diff --git a/.env b/.env index d5cffcec0aa..9d604630073 100644 --- a/.env +++ b/.env @@ -1,5 +1,5 @@ APP_IMAGE=gdcc/dataverse:unstable -POSTGRES_VERSION=16 +POSTGRES_VERSION=17 DATAVERSE_DB_USER=dataverse SOLR_VERSION=9.3.0 -SKIP_DEPLOY=0 \ No newline at end of file +SKIP_DEPLOY=0 diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index b57aa23fc0f..f2a779bbf21 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -2,7 +2,7 @@ **Which issue(s) this PR closes**: -Closes # +- Closes # **Special notes for your reviewer**: diff --git a/.github/workflows/guides_build_sphinx.yml b/.github/workflows/guides_build_sphinx.yml index 992f30f2872..86b59b11d35 100644 --- a/.github/workflows/guides_build_sphinx.yml +++ b/.github/workflows/guides_build_sphinx.yml @@ -11,6 +11,6 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - - uses: OdumInstitute/sphinx-action@master + - uses: uncch-rdmc/sphinx-action@master with: docs-folder: "doc/sphinx-guides/" diff --git a/doc/release-notes/10169-JSON-schema-validation.md b/doc/release-notes/10169-JSON-schema-validation.md deleted file mode 100644 index 92ff4a917d5..00000000000 --- a/doc/release-notes/10169-JSON-schema-validation.md +++ /dev/null @@ -1,3 +0,0 @@ -### Improved JSON Schema validation for datasets - -Enhanced JSON schema validation with checks for required and allowed child objects, type checking for field types including `primitive`, `compound` and `controlledVocabulary`. More user-friendly error messages to help pinpoint the issues in the dataset JSON. See [Retrieve a Dataset JSON Schema for a Collection](https://guides.dataverse.org/en/6.3/api/native-api.html#retrieve-a-dataset-json-schema-for-a-collection) in the API Guide and PR #10543. diff --git a/doc/release-notes/10287-use-support-address-in-system-email-text.md b/doc/release-notes/10287-use-support-address-in-system-email-text.md deleted file mode 100644 index 4c294404298..00000000000 --- a/doc/release-notes/10287-use-support-address-in-system-email-text.md +++ /dev/null @@ -1,4 +0,0 @@ -### Notification Email Improvement - -The system email text has been improved to use the support email address (`dataverse.mail.support-email`) in the text where it states; "contact us for support at", instead of the default system email address. -Using the system email address here was particularly problematic when it was a 'noreply' address. diff --git a/doc/release-notes/10341-croissant.md b/doc/release-notes/10341-croissant.md deleted file mode 100644 index 15bc7029099..00000000000 --- a/doc/release-notes/10341-croissant.md +++ /dev/null @@ -1,9 +0,0 @@ -A new metadata export format called Croissant is now available as an external metadata exporter. It is oriented toward making datasets consumable by machine learning. - -When enabled, Croissant replaces the Schema.org JSON-LD format in the `` of dataset landing pages. For details, see the [Schema.org JSON-LD/Croissant Metadata](https://dataverse-guide--10533.org.readthedocs.build/en/10533/admin/discoverability.html#schema-org-head) under the discoverability section of the Admin Guide. - -For more about the Croissant exporter, see https://github.com/gdcc/exporter-croissant - -For installation instructions, see [Enabling External Exporters](https://dataverse-guide--10533.org.readthedocs.build/en/10533/installation/advanced.html#enabling-external-exporters) in the Installation Guide. - -See also Issue #10341 and PR #10533. diff --git a/doc/release-notes/10379-MetricsBugsFixes.md b/doc/release-notes/10379-MetricsBugsFixes.md new file mode 100644 index 00000000000..0ebc6d99f0b --- /dev/null +++ b/doc/release-notes/10379-MetricsBugsFixes.md @@ -0,0 +1,10 @@ + +### Metrics API Bug fixes + +Two bugs in the Metrics API have been fixed: + +- The /datasets and /datasets/byMonth endpoints could report incorrect values if/when they have been called using the dataLocation parameter (which allows getting metrics for local, remote (harvested), or all datasets) as the metrics cache was not storing different values for these cases. + +- Metrics endpoints who's calculation relied on finding the latest published datasetversion were incorrect if/when the minor version number was > 9. + +When deploying the new release, the [/api/admin/clearMetricsCache](https://guides.dataverse.org/en/latest/api/native-api.html#metrics) API should be called to remove old cached values that may be incorrect. \ No newline at end of file diff --git a/doc/release-notes/10433-add-thumbnail-for-featured-dataverses.md b/doc/release-notes/10433-add-thumbnail-for-featured-dataverses.md deleted file mode 100644 index 0ebb84a8eb0..00000000000 --- a/doc/release-notes/10433-add-thumbnail-for-featured-dataverses.md +++ /dev/null @@ -1,5 +0,0 @@ -Add the ability to configure a thumbnail logo that is displayed for a collection when the collection is configured as a featured collection. If present, this thumbnail logo is shown. Otherwise, the collection logo is shown. Configuration is done under the "Theme" for a collection. - -The HTML preview of the documentation can be found [here](https://dataverse-guide--10433.org.readthedocs.build/en/10433/user/dataverse-management.html#theme). - -For more information, see [#10291](https://github.com/IQSS/dataverse/issues/10291). diff --git a/doc/release-notes/10478-version-base-image.md b/doc/release-notes/10478-version-base-image.md deleted file mode 100644 index 34f444a2122..00000000000 --- a/doc/release-notes/10478-version-base-image.md +++ /dev/null @@ -1,7 +0,0 @@ -### Adding versioned tags to Container Base Images - -With this release we introduce a detailed maintenance workflow for our container images. -As output of the GDCC Containerization Working Group, the community takes another step towards production ready containers available directly from the core project. - -The maintenance workflow regularly updates the Container Base Image, which contains the operating system, Java, Payara Application Server, as well as tools and libraries required by the Dataverse application. -Shipping these rolling releases as well as immutable revisions is the foundation for secure and reliable Dataverse Application Container images. diff --git a/doc/release-notes/10508-base-image-fixes.md b/doc/release-notes/10508-base-image-fixes.md deleted file mode 100644 index 148066435e8..00000000000 --- a/doc/release-notes/10508-base-image-fixes.md +++ /dev/null @@ -1,12 +0,0 @@ -# Security and Compatibility Fixes to the Container Base Image - -- Switch "wait-for" to "wait4x", aligned with the Configbaker Image -- Update "jattach" to v2.2 -- Install AMD64 / ARM64 versions of tools as necessary -- Run base image as unprivileged user by default instead of `root` - this was an oversight from OpenShift changes -- Linux User, Payara Admin and Domain Master passwords: - - Print hints about default, public knowledge passwords in place for - - Enable replacing these passwords at container boot time -- Enable building with updates Temurin JRE image based on Ubuntu 24.04 LTS -- Fix entrypoint script troubles with pre- and postboot script files -- Unify location of files at CONFIG_DIR=/opt/payara/config, avoid writing to other places \ No newline at end of file diff --git a/doc/release-notes/10517-datasetType.md b/doc/release-notes/10517-datasetType.md deleted file mode 100644 index 2e3aff940c7..00000000000 --- a/doc/release-notes/10517-datasetType.md +++ /dev/null @@ -1,10 +0,0 @@ -### Initial Support for Dataset Types - -Out of the box, all datasets have the type "dataset" but superusers can add additional types. At this time the type can only be set at creation time via API. The types "dataset", "software", and "workflow" will be sent to DataCite when the dataset is published. - -For details see and #10517. Please note that this feature is highly experimental and is expected to evolve. - -Upgrade instructions --------------------- - -Update your Solr schema.xml file to pick up the "datasetType" additions and do a full reindex. diff --git a/doc/release-notes/10583-dataset-unlink-functionality-same-permission-as-link.md b/doc/release-notes/10583-dataset-unlink-functionality-same-permission-as-link.md deleted file mode 100644 index f97bd252db3..00000000000 --- a/doc/release-notes/10583-dataset-unlink-functionality-same-permission-as-link.md +++ /dev/null @@ -1,2 +0,0 @@ -New "Unlink Dataset" button has been added to the Dataset Page to allow a user to unlink a dataset from a collection that was previously linked with the "Link Dataset" button. The user must possess the same permissions needed to unlink the Dataset as they would to link the Dataset. -The [existing API](https://guides.dataverse.org/en/6.3/admin/dataverses-datasets.html#unlink-a-dataset) for unlinking datasets has been updated to no longer require superuser access. The "Publish Dataset" permission is now enough. diff --git a/doc/release-notes/10606-dataverse-in-windows-wsl.md b/doc/release-notes/10606-dataverse-in-windows-wsl.md deleted file mode 100644 index 9501d6e3090..00000000000 --- a/doc/release-notes/10606-dataverse-in-windows-wsl.md +++ /dev/null @@ -1 +0,0 @@ -New instructions have been added for developers on Windows trying to run a Dataverse development environment using Windows Subsystem for Linux (WSL). See https://dataverse-guide--10608.org.readthedocs.build/en/10608/developers/windows.html #10606 and #10608. diff --git a/doc/release-notes/10623-globus-improvements.md b/doc/release-notes/10623-globus-improvements.md deleted file mode 100644 index 9eb529bc4f7..00000000000 --- a/doc/release-notes/10623-globus-improvements.md +++ /dev/null @@ -1 +0,0 @@ -A new alternative implementation of Globus polling during upload data transfers has been added in this release. This experimental framework does not rely on the instance staying up continuously for the duration of the transfer and saves the state information about Globus upload requests in the database. See `globus-use-experimental-async-framework` under [Feature Flags](https://dataverse-guide--10781.org.readthedocs.build/en/10781/installation/config.html#feature-flags) and [dataverse.files.globus-monitoring-server](https://dataverse-guide--10781.org.readthedocs.build/en/10781/installation/config.html#dataverse-files-globus-monitoring-server) in the Installation Guide. See also #10623 and #10781. diff --git a/doc/release-notes/10632-DataCiteXMLandRelationType.md b/doc/release-notes/10632-DataCiteXMLandRelationType.md deleted file mode 100644 index 42c1cfb6eda..00000000000 --- a/doc/release-notes/10632-DataCiteXMLandRelationType.md +++ /dev/null @@ -1,41 +0,0 @@ -### Enhanced DataCite Metadata, Relation Type - -A new field has been added to the citation metadatablock to allow entry of the "Relation Type" between a "Related Publication" and a dataset. The Relation Type is currently limited to the most common 6 values recommended by DataCite: isCitedBy, Cites, IsSupplementTo, IsSupplementedBy, IsReferencedBy, and References. For existing datasets where no "Relation Type" has been specified, "IsSupplementTo" is assumed. - -Dataverse now supports the DataCite v4.5 schema. Additional metadata, including metadata about Related Publications, and files in the dataset are now being sent to DataCite and improvements to how PIDs (ORCID, ROR, DOIs, etc.), license/terms, geospatial, and other metadata is represented have been made. The enhanced metadata will automatically be sent when datasets are created and published and is available in the DataCite XML export after publication. - -The additions are in rough alignment with the OpenAIRE XML export, but there are some minor differences in addition to the Relation Type addition, including an update to the DataCite 4.5 schema. For details see https://github.com/IQSS/dataverse/pull/10632 and https://github.com/IQSS/dataverse/pull/10615 and the [design document](https://docs.google.com/document/d/1JzDo9UOIy9dVvaHvtIbOI8tFU6bWdfDfuQvWWpC0tkA/edit?usp=sharing) referenced there. - -Multiple backward incompatible changes and bug fixes have been made to API calls (3 of the four of which were not documented) related to updating PID target urls and metadata at the provider service: -- [Update Target URL for a Published Dataset at the PID provider](https://guides.dataverse.org/en/latest/admin/dataverses-datasets.html#update-target-url-for-a-published-dataset-at-the-pid-provider) -- [Update Target URL for all Published Datasets at the PID provider](https://guides.dataverse.org/en/latest/admin/dataverses-datasets.html#update-target-url-for-all-published-datasets-at-the-pid-provider) -- [Update Metadata for a Published Dataset at the PID provider](https://guides.dataverse.org/en/latest/admin/dataverses-datasets.html#update-metadata-for-a-published-dataset-at-the-pid-provider) -- [Update Metadata for all Published Datasets at the PID provider](https://guides.dataverse.org/en/latest/admin/dataverses-datasets.html#update-metadata-for-all-published-datasets-at-the-pid-provider) - -Upgrade instructions --------------------- - -The Solr schema has to be updated via the normal mechanism to add the new "relationType" field. - -The citation metadatablock has to be reinstalled using the standard instructions. - -With these two changes, the "Relation Type" fields will be available and creation/publication of datasets will result in the expanded XML being sent to DataCite. - -To update existing datasets (and files using DataCite DOIs): - -Exports can be updated by running `curl http://localhost:8080/api/admin/metadata/reExportAll` - -Entries at DataCite for published datasets can be updated by a superuser using an API call (newly documented): - -`curl -X POST -H 'X-Dataverse-key:' http://localhost:8080/api/datasets/modifyRegistrationPIDMetadataAll` - -This will loop through all published datasets (and released files with PIDs). As long as the loop completes, the call will return a 200/OK response. Any PIDs for which the update fails can be found using - -`grep 'Failure for id' server.log` - -Failures may occur if PIDs were never registered, or if they were never made findable. Any such cases can be fixed manually in DataCite Fabrica or using the [Reserve a PID](https://guides.dataverse.org/en/latest/api/native-api.html#reserve-a-pid) API call and the newly documented `/api/datasets//modifyRegistration` call respectively. See https://guides.dataverse.org/en/latest/admin/dataverses-datasets.html#send-dataset-metadata-to-pid-provider. Please reach out with any questions. - -PIDs can also be updated by a superuser on a per-dataset basis using - -`curl -X POST -H 'X-Dataverse-key:' http://localhost:8080/api/datasets//modifyRegistrationMetadata` - diff --git a/doc/release-notes/10633-add-dataverse-api-extension.md b/doc/release-notes/10633-add-dataverse-api-extension.md deleted file mode 100644 index f5d8030e8ac..00000000000 --- a/doc/release-notes/10633-add-dataverse-api-extension.md +++ /dev/null @@ -1 +0,0 @@ -The addDataverse (/api/dataverses/{identifier}) API endpoint has been extended to allow adding metadata blocks, input levels and facet ids at creation time, as the Dataverse page in create mode does in JSF. diff --git a/doc/release-notes/10697-improve-permission-indexing.md b/doc/release-notes/10697-improve-permission-indexing.md new file mode 100644 index 00000000000..b232b1c4d3c --- /dev/null +++ b/doc/release-notes/10697-improve-permission-indexing.md @@ -0,0 +1,7 @@ +### Reindexing after a role assignment is less memory intensive + +Adding/removing a user from a role on a collection, particularly the root collection, could lead to a significant increase in memory use resulting in Dataverse itself failing with an out-of-memory condition. Such changes now consume much less memory. + +If you have experienced out-of-memory failures in Dataverse in the past that could have been caused by this problem, you may wish to run a [reindex in place](https://guides.dataverse.org/en/latest/admin/solr-search-index.html#reindex-in-place) to update any out-of-date information. + +For more information, see #10697 and #10698. diff --git a/doc/release-notes/10708 - MDC Citation and DOI parsing improvements.md b/doc/release-notes/10708 - MDC Citation and DOI parsing improvements.md new file mode 100644 index 00000000000..86c1bb14d32 --- /dev/null +++ b/doc/release-notes/10708 - MDC Citation and DOI parsing improvements.md @@ -0,0 +1,3 @@ +MDC Citation retrieval with the PID settings has been fixed. +PID parsing in Dataverse is now case insensitive, improving interaction with services that may change the case of PIDs. +Warnings related to managed/excluded PID lists for PID providers have been reduced diff --git a/doc/release-notes/10711-CVoc Updates.md b/doc/release-notes/10711-CVoc Updates.md deleted file mode 100644 index f747bedb049..00000000000 --- a/doc/release-notes/10711-CVoc Updates.md +++ /dev/null @@ -1 +0,0 @@ -Changes in Dataverse and updates to the ORCID and ROR external vocabulary scripts support deploying these for the citation block author field (and others). diff --git a/doc/release-notes/10726-dataverse-facets-api-extension.md b/doc/release-notes/10726-dataverse-facets-api-extension.md deleted file mode 100644 index baf6f798e35..00000000000 --- a/doc/release-notes/10726-dataverse-facets-api-extension.md +++ /dev/null @@ -1,3 +0,0 @@ -New optional query parameter "returnDetails" added to "dataverses/{identifier}/facets/" endpoint to include detailed information of each DataverseFacet. - -New endpoint "datasetfields/facetables" that lists all facetable dataset fields defined in the installation. diff --git a/doc/release-notes/10733-add-publication-status-to-search-api-results.md b/doc/release-notes/10733-add-publication-status-to-search-api-results.md deleted file mode 100644 index d015a50a00d..00000000000 --- a/doc/release-notes/10733-add-publication-status-to-search-api-results.md +++ /dev/null @@ -1,14 +0,0 @@ -Search API (/api/search) response will now include publicationStatuses in the Json response as long as the list is not empty - -Example: -```javascript -"items": [ - { - "name": "Darwin's Finches", - ... - "publicationStatuses": [ - "Unpublished", - "Draft" - ], -(etc, etc) -``` diff --git a/doc/release-notes/10741-list-metadatablocks-display-on-create-fix.md b/doc/release-notes/10741-list-metadatablocks-display-on-create-fix.md deleted file mode 100644 index 4edadcaa1fc..00000000000 --- a/doc/release-notes/10741-list-metadatablocks-display-on-create-fix.md +++ /dev/null @@ -1 +0,0 @@ -Fixed dataverses/{identifier}/metadatablocks endpoint to not return fields marked as displayOnCreate=true if there is an input level with include=false, when query parameters returnDatasetFieldTypes=true and onlyDisplayedOnCreate=true are set. diff --git a/doc/release-notes/10742-newest-oldest-sort-order-backwards.md b/doc/release-notes/10742-newest-oldest-sort-order-backwards.md new file mode 100644 index 00000000000..0afaf45449d --- /dev/null +++ b/doc/release-notes/10742-newest-oldest-sort-order-backwards.md @@ -0,0 +1,3 @@ +## Minor bug fix to UI to fix the order of the files on the Dataset Files page when ordering by Date + +A fix was made to the ui to fix the ordering 'Newest' and 'Oldest' which were reversed diff --git a/doc/release-notes/10744-ro-crate-docs.md b/doc/release-notes/10744-ro-crate-docs.md deleted file mode 100644 index 9d52b4578b4..00000000000 --- a/doc/release-notes/10744-ro-crate-docs.md +++ /dev/null @@ -1,3 +0,0 @@ -## RO-Crate Support (Metadata Export) - -Dataverse now supports [RO-Crate](https://www.researchobject.org/ro-crate/) in the sense that dataset metadata can be exported in that format. This functionality is not available out of the box but you can enable one or more RO-Crate exporters from the [list of external exporters](https://preview.guides.gdcc.io/en/develop/installation/advanced.html#inventory-of-external-exporters). See also #10744. diff --git a/doc/release-notes/10749-dataverse-user-permissions-api-extension.md b/doc/release-notes/10749-dataverse-user-permissions-api-extension.md deleted file mode 100644 index 706b1f42641..00000000000 --- a/doc/release-notes/10749-dataverse-user-permissions-api-extension.md +++ /dev/null @@ -1 +0,0 @@ -New API endpoint "dataverses/{identifier}/userPermissions" for obtaining the user permissions on a dataverse. diff --git a/doc/release-notes/10758-rust-client.md b/doc/release-notes/10758-rust-client.md deleted file mode 100644 index e206f27ce65..00000000000 --- a/doc/release-notes/10758-rust-client.md +++ /dev/null @@ -1,3 +0,0 @@ -### Rust API client library - -An API client library for the Rust programming language is now available at https://github.com/gdcc/rust-dataverse and has been added to the [list of client libraries](https://dataverse-guide--10758.org.readthedocs.build/en/10758/api/client-libraries.html) in the API Guide. See also #10758. diff --git a/doc/release-notes/10772-fix-importDDI-otherId.md b/doc/release-notes/10772-fix-importDDI-otherId.md new file mode 100644 index 00000000000..d5a9018b2b2 --- /dev/null +++ b/doc/release-notes/10772-fix-importDDI-otherId.md @@ -0,0 +1,2 @@ +Bug Fix : +This PR fixes the `edu.harvard.iq.dataverse.util.json.JsonParseException: incorrect multiple for field otherId` error when DDI harvested data contains multiple ortherId. \ No newline at end of file diff --git a/doc/release-notes/10797-update-current-version-bug-fix.md b/doc/release-notes/10797-update-current-version-bug-fix.md deleted file mode 100644 index 2cfaf69cad3..00000000000 --- a/doc/release-notes/10797-update-current-version-bug-fix.md +++ /dev/null @@ -1,11 +0,0 @@ -A significant bug in the superuser-only "Update-Current-Version" publication was found and fixed in this release. If the Update-Current-Version option was used when changes were made to the dataset Terms (rather than to dataset metadata), or if the PID provider service was down/returned an error, the update would fail and render the dataset unusable and require restoration from a backup. The fix in this release allows the update to succeed in both of these cases and redesigns the functionality such that any unknown issues should not make the dataset unusable (i.e. the error would be reported and the dataset would remain in its current state with the last-published version as it was and changes still in the draft version.) - -Users of earlier Dataverse releases are encouraged to alert their superusers to this issue. Those who wish to disable this functionality have two options: -* Change the dataset.updateRelease entry in the Bundle.properties file (or local language version) to "Do Not Use" or similar (doesn't disable but alerts superusers to the issue), or -* Edit the dataset.xhtml file to remove the lines - - - - - -, delete the contents of the generated and osgi-cache directories in the Dataverse Payara domain, and restart the Payara server. diff --git a/doc/release-notes/10800-add-dataverse-request-json-fix.md b/doc/release-notes/10800-add-dataverse-request-json-fix.md deleted file mode 100644 index ddd6c388ec6..00000000000 --- a/doc/release-notes/10800-add-dataverse-request-json-fix.md +++ /dev/null @@ -1 +0,0 @@ -Fixed the "addDataverse" API endpoint (/dataverses/{id} POST) expected request JSON structure to parse facetIds as described in the docs. \ No newline at end of file diff --git a/doc/release-notes/10810-search-api-payload-extensions.md b/doc/release-notes/10810-search-api-payload-extensions.md deleted file mode 100644 index 5112d9f62ee..00000000000 --- a/doc/release-notes/10810-search-api-payload-extensions.md +++ /dev/null @@ -1,52 +0,0 @@ -Search API (/api/search) response will now include new fields for the different entities. - -For Dataverse: - -- "affiliation" -- "parentDataverseName" -- "parentDataverseIdentifier" -- "image_url" (optional) - -```javascript -"items": [ - { - "name": "Darwin's Finches", - ... - "affiliation": "Dataverse.org", - "parentDataverseName": "Root", - "parentDataverseIdentifier": "root", - "image_url":"..." -(etc, etc) -``` - -For DataFile: - -- "releaseOrCreateDate" -- "image_url" (optional) - -```javascript -"items": [ - { - "name": "test.txt", - ... - "releaseOrCreateDate": "2016-05-10T12:53:39Z", - "image_url":"..." -(etc, etc) -``` - -For Dataset: - -- "image_url" (optional) - -```javascript -"items": [ - { - ... - "image_url": "http://localhost:8080/api/datasets/2/logo" - ... -(etc, etc) -``` - -The image_url field was already part of the SolrSearchResult JSON (and incorrectly appeared in Search API documentation), but it wasn’t returned by the API because it was appended only after the Solr query was executed in the SearchIncludeFragment of JSF. Now, the field is set in SearchServiceBean, ensuring it is always returned by the API when an image is available. - -The schema.xml file for Solr has been updated to include a new field called dvParentAlias for supporting the new response field "parentDataverseIdentifier". So for the next Dataverse released version, a Solr reindex will be necessary to apply the new schema.xml version. diff --git a/doc/release-notes/10819-publish-thumbnail-bug.md b/doc/release-notes/10819-publish-thumbnail-bug.md deleted file mode 100644 index 46c9875a6ef..00000000000 --- a/doc/release-notes/10819-publish-thumbnail-bug.md +++ /dev/null @@ -1,6 +0,0 @@ -The initial release of the Dataverse v6.3 introduced a bug where publishing would break the dataset thumbnail, which in turn broke the rendering of the parent Collection ("dataverse") page. This problem was fixed in the PR 10820. - -This bug fix will prevent this from happening in the future, but does not fix any existing broken links. To restore any broken thumbnails caused by this bug, you can call the http://localhost:8080/api/admin/clearThumbnailFailureFlag API, which will attempt to clear the flag on all files (regardless of whether caused by this bug or some other problem with the file) or the http://localhost:8080/api/admin/clearThumbnailFailureFlag/id to clear the flag for individual files. Calling the former, batch API is recommended. - -Additionally, the same PR made it possible to turn off the feature that automatically selects of one of the image datafiles to serve as the thumbnail of the parent dataset. An admin can turn it off by raising the feature flag `-Ddataverse.feature.disable-dataset-thumbnail-autoselect=true`. When the feature is disabled, a user can still manually pick a thumbnail image, or upload a dedicated thumbnail image. - diff --git a/doc/release-notes/10831-standardize-image-url-of-search-api.md b/doc/release-notes/10831-standardize-image-url-of-search-api.md deleted file mode 100644 index 1910091455c..00000000000 --- a/doc/release-notes/10831-standardize-image-url-of-search-api.md +++ /dev/null @@ -1,28 +0,0 @@ -Search API (/api/search) response will now include new image_url format for the Datafile and Dataverse logo. -Note to release note writer: this supersedes the release note 10810-search-api-payload-extensions.md - -For Dataverse: - -- "image_url" (optional) - -```javascript -"items": [ - { - "name": "Darwin's Finches", - ... - "image_url":"/api/access/dvCardImage/{identifier}" -(etc, etc) -``` - -For DataFile: - -- "image_url" (optional) - -```javascript -"items": [ - { - "name": "test.txt", - ... - "image_url":"/api/access/datafile/{identifier}?imageThumb=true" -(etc, etc) -``` diff --git a/doc/release-notes/10857-add-expiration-date-to-recreate-token-api.md b/doc/release-notes/10857-add-expiration-date-to-recreate-token-api.md new file mode 100644 index 00000000000..b450867c630 --- /dev/null +++ b/doc/release-notes/10857-add-expiration-date-to-recreate-token-api.md @@ -0,0 +1 @@ +An optional query parameter called 'returnExpiration' has been added to the 'users/token/recreate' endpoint, which, if set to true, returns the expiration time in the response message. diff --git a/doc/release-notes/10869-fix-npe-using-cvoc.md b/doc/release-notes/10869-fix-npe-using-cvoc.md deleted file mode 100644 index 53214d3789d..00000000000 --- a/doc/release-notes/10869-fix-npe-using-cvoc.md +++ /dev/null @@ -1 +0,0 @@ -This release fixes a bug in the external controlled vocabulary mechanism (introduced in v6.3) that could cause indexing to fail when a script is configured for one child field and no other child fields were managed. \ No newline at end of file diff --git a/doc/release-notes/10886-update-to-conditions-to-display-image_url.md b/doc/release-notes/10886-update-to-conditions-to-display-image_url.md new file mode 100644 index 00000000000..6dfe8eb9f2d --- /dev/null +++ b/doc/release-notes/10886-update-to-conditions-to-display-image_url.md @@ -0,0 +1,8 @@ +Search API (/api/search) responses for Datafiles include image_url for the thumbnail if each of the following are true: +1. The DataFile is not Harvested +2. A Thumbnail is available for the Datafile +3. If the Datafile is Restricted then the caller must have Download File Permission for the Datafile +4. The Datafile is NOT actively embargoed +5. The Datafile's retention period has NOT expired + +See also #10875 and #10886. diff --git a/doc/release-notes/10889_bump_PG17_FlyWay10.md b/doc/release-notes/10889_bump_PG17_FlyWay10.md new file mode 100644 index 00000000000..932c06fbc3d --- /dev/null +++ b/doc/release-notes/10889_bump_PG17_FlyWay10.md @@ -0,0 +1,7 @@ +This release bumps both the Postgres JDBC driver and Flyway versions. This should better support Postgres version 17, and as of version 10 Flyway no longer requires a paid subscription to support older versions of Postgres. + +While we don't encourage the use of older Postgres versions, this flexibility may benefit some of our long-standing installations in their upgrade paths. Postgres 13 remains the version used with automated testing. + +As part of this update, the containerized development environment now uses Postgres 17 instead of 16. Developers must delete their data (`rm -rf docker-dev-volumes`) and start with an empty database. They can rerun the quickstart in the dev guide. + +The Docker compose file used for [evaluations or demos](https://dataverse-guide--10912.org.readthedocs.build/en/10912/container/running/demo.html) has been upgraded from Postgres 13 to 17. diff --git a/doc/release-notes/10901deaccessioned file edit fix.md b/doc/release-notes/10901deaccessioned file edit fix.md new file mode 100644 index 00000000000..db12b1fc978 --- /dev/null +++ b/doc/release-notes/10901deaccessioned file edit fix.md @@ -0,0 +1 @@ +When a dataset was deaccessioned and was the only previous version it will cause an error when trying to update the files. \ No newline at end of file diff --git a/doc/release-notes/10904-edit-dataverse-collection-endpoint.md b/doc/release-notes/10904-edit-dataverse-collection-endpoint.md new file mode 100644 index 00000000000..b9256941eea --- /dev/null +++ b/doc/release-notes/10904-edit-dataverse-collection-endpoint.md @@ -0,0 +1 @@ +Adds a new endpoint (`PUT /api/dataverses/`) for updating an existing Dataverse collection using a JSON file following the same structure as the one used in the API for the creation. diff --git a/doc/release-notes/10914-users-token-api-credentials.md b/doc/release-notes/10914-users-token-api-credentials.md new file mode 100644 index 00000000000..888214481f6 --- /dev/null +++ b/doc/release-notes/10914-users-token-api-credentials.md @@ -0,0 +1,3 @@ +Extended the users/token GET endpoint to support any auth mechanism for retrieving the token information. + +Previously, this endpoint only accepted an API token to retrieve its information. Now, it accepts any authentication mechanism and returns the associated API token information. diff --git a/doc/release-notes/10919-minor-DataCiteXML-bugfix.md b/doc/release-notes/10919-minor-DataCiteXML-bugfix.md new file mode 100644 index 00000000000..4fa0c1142b1 --- /dev/null +++ b/doc/release-notes/10919-minor-DataCiteXML-bugfix.md @@ -0,0 +1 @@ +A minor bug fix was made to avoid sending a useless ", null" in the DataCiteXML sent to DataCite and in the DataCite export when a dataset has a metadata entry for "Software Name" and no entry for "Software Version". The bug fix will update datasets upon publication. Anyone with existing published datasets with this problem can be fixed by [pushing updated metadata to DataCite for affected datasets](https://guides.dataverse.org/en/6.4/admin/dataverses-datasets.html#update-metadata-for-a-published-dataset-at-the-pid-provider) and [re-exporting the dataset metadata](https://guides.dataverse.org/en/6.4/admin/metadataexport.html#batch-exports-through-the-api) or by following steps 9 and 10 in the v6.4 release notes to update and re-export all datasets. diff --git a/doc/release-notes/10939-i18n-docker.md b/doc/release-notes/10939-i18n-docker.md new file mode 100644 index 00000000000..d9887b684db --- /dev/null +++ b/doc/release-notes/10939-i18n-docker.md @@ -0,0 +1,5 @@ +## Multiple Language in Docker + +Configuration and documentation has been added to explain how to set up multiple languages (e.g. English and French) in the tutorial for setting up Dataverse in Docker. + +See also #10939 diff --git a/doc/release-notes/10947-unpublished-files-appearing-in-search-results-for-anon-user.md b/doc/release-notes/10947-unpublished-files-appearing-in-search-results-for-anon-user.md new file mode 100644 index 00000000000..66ea04b124f --- /dev/null +++ b/doc/release-notes/10947-unpublished-files-appearing-in-search-results-for-anon-user.md @@ -0,0 +1,11 @@ +## Unpublished file bug fix + +A bug fix was made that gets the major version of a Dataset when all major versions were deaccessioned. This fixes the incorrect showing of the files as "Unpublished" in the search list even when they are published. +This fix affects the indexing, meaning these datasets must be re-indexed once Dataverse is updated. This can be manually done by calling the index API for each affected Dataset. + +Example: +```shell +curl http://localhost:8080/api/admin/index/dataset?persistentId=doi:10.7910/DVN/6X4ZZL +``` + +See also #10947 and #10974. diff --git a/doc/release-notes/10969-order-subfields-version-difference.md b/doc/release-notes/10969-order-subfields-version-difference.md new file mode 100644 index 00000000000..3f245ebe069 --- /dev/null +++ b/doc/release-notes/10969-order-subfields-version-difference.md @@ -0,0 +1,2 @@ +Bug Fix: +In order to facilitate the comparison between the draft version and the published version of a dataset, a sort on subfields has been added (#10969) \ No newline at end of file diff --git a/doc/release-notes/11012-get-dataverse-api-ext.md b/doc/release-notes/11012-get-dataverse-api-ext.md new file mode 100644 index 00000000000..641aa373174 --- /dev/null +++ b/doc/release-notes/11012-get-dataverse-api-ext.md @@ -0,0 +1 @@ +The JSON payload of the getDataverse endpoint has been extended to include properties isMetadataBlockRoot and isFacetRoot. diff --git a/doc/release-notes/6.4-release-notes.md b/doc/release-notes/6.4-release-notes.md new file mode 100644 index 00000000000..979fd16bf9e --- /dev/null +++ b/doc/release-notes/6.4-release-notes.md @@ -0,0 +1,526 @@ +# Dataverse 6.4 + +Please note: To read these instructions in full, please go to https://github.com/IQSS/dataverse/releases/tag/v6.4 rather than the list of releases, which will cut them off. + +This release brings new features, enhancements, and bug fixes to Dataverse. Thank you to all of the community members who contributed code, suggestions, bug reports, and other assistance across the project. + +## Release Highlights + +New features in Dataverse 6.4: + +- Enhanced DataCite Metadata, including "Relation Type" +- All ISO 639-3 languages are now supported +- There is now a button for "Unlink Dataset" +- Users will have DOIs/PIDs reserved for their files as part of file upload instead of at publication time +- Datasets can now have types such as "software" or "workflow" +- Croissant support +- RO-Crate support +- and more! Please see below. + +New client library: + +- Rust + +This release also fixes two important bugs described below and in [a post](https://groups.google.com/g/dataverse-community/c/evn5C-pyrS8/m/JrH9vp47DwAJ) on the mailing list: + +- "Update Current Version" can cause metadata loss +- Publishing breaks designated dataset thumbnail, messes up collection page + +Additional details on the above as well as many more features and bug fixes included in the release are described below. Read on! + +## Features Added + +### Enhanced DataCite Metadata, Including "Relation Type" + +Within the "Related Publication" field, a new subfield has been added called "Relation Type" that allows for the most common [values](https://datacite-metadata-schema.readthedocs.io/en/4.5/appendices/appendix-1/relationType/) recommended by DataCite: isCitedBy, Cites, IsSupplementTo, IsSupplementedBy, IsReferencedBy, and References. For existing datasets where no "Relation Type" has been specified, "IsSupplementTo" is assumed. + +Dataverse now supports the [DataCite v4.5 schema](http://schema.datacite.org/meta/kernel-4/). Additional metadata is now being sent to DataCite including metadata about related publications and files in the dataset. Improved metadata is being sent including how PIDs (ORCID, ROR, DOIs, etc.), license/terms, geospatial, and other metadata are represented. The enhanced metadata will automatically be sent to DataCite when datasets are created and published. Additionally, after publication, you can inspect what was sent by looking at the DataCite XML export. + +The additions are in rough alignment with the OpenAIRE XML export, but there are some minor differences in addition to the Relation Type addition, including an update to the DataCite 4.5 schema. For details see #10632, #10615 and the [design document](https://docs.google.com/document/d/1JzDo9UOIy9dVvaHvtIbOI8tFU6bWdfDfuQvWWpC0tkA/edit?usp=sharing) referenced there. + +Multiple backward incompatible changes and bug fixes have been made to API calls (three of four of which were not documented) related to updating PID target URLs and metadata at the provider service: +- [Update Target URL for a Published Dataset at the PID provider](https://guides.dataverse.org/en/6.4/admin/dataverses-datasets.html#update-target-url-for-a-published-dataset-at-the-pid-provider) +- [Update Target URL for all Published Datasets at the PID provider](https://guides.dataverse.org/en/6.4/admin/dataverses-datasets.html#update-target-url-for-all-published-datasets-at-the-pid-provider) +- [Update Metadata for a Published Dataset at the PID provider](https://guides.dataverse.org/en/6.4/admin/dataverses-datasets.html#update-metadata-for-a-published-dataset-at-the-pid-provider) +- [Update Metadata for all Published Datasets at the PID provider](https://guides.dataverse.org/en/6.4/admin/dataverses-datasets.html#update-metadata-for-all-published-datasets-at-the-pid-provider) + +### Full List of ISO 639-3 Languages Now Supported + +The controlled vocabulary values list for the metadata field "Language" in the citation block has now been extended to include roughly 7920 ISO 639-3 values. + +Some of the language entries in the pre-6.4 list correspond to "macro languages" in ISO-639-3 and admins/users may wish to update to use the corresponding individual language entries from ISO-639-3. As these cases are expected to be rare (they do not involve major world languages), finding them is not covered in the release notes. Anyone who desires help in this area is encouraged to reach out to the Dataverse community via any of the standard communication channels. + +ISO 639-3 codes were downloaded from [sil.org](https://iso639-3.sil.org/code_tables/download_tables#Complete%20Code%20Tables:~:text=iso%2D639%2D3_Code_Tables_20240415.zip) and the file used for merging with the existing citation.tsv was "iso-639-3.tab". See also #8578 and #10762. + +### Unlink Dataset Button + +A new "Unlink Dataset" button has been added to the dataset page to allow a user to unlink a dataset from a collection. To unlink a dataset the user must have permission to link the dataset. Additionally, the [existing API](https://guides.dataverse.org/en/6.4/admin/dataverses-datasets.html#unlink-a-dataset) for unlinking datasets has been updated to no longer require superuser access as the "Publish Dataset" permission is now enough. See also #10583 and #10689. + +### Pre-Publish File DOI Reservation + +Dataverse installations using DataCite as a persistent identifier (PID) provider (or other providers that support reserving PIDs) will be able to reserve PIDs for files when they are uploaded (rather than at publication time). Note that reserving file DOIs can slow uploads with large numbers of files so administrators may need to adjust timeouts (specifically any Apache "``ProxyPass / ajp://localhost:8009/ timeout=``" setting in the recommended Dataverse configuration). + +### Initial Support for Dataset Types + +Out of the box, all datasets now have the type "dataset" but superusers can add additional types. At this time the type of a dataset can only be set at creation time via API. The types "dataset", "software", and "workflow" (just those three, for now) will be sent to DataCite (as `resourceTypeGeneral`) when the dataset is published. + +For details see [the guides](https://guides.dataverse.org/en/6.4/user/dataset-management.html#dataset-types), #10517 and #10694. Please note that this feature is highly experimental and is expected to [evolve](https://github.com/IQSS/dataverse-pm/issues/307). + +### Croissant Support (Metadata Export) + +A new metadata export format called [Croissant](https://github.com/mlcommons/croissant) is now available as an external metadata exporter. It is oriented toward making datasets consumable by machine learning. + +For more about the Croissant exporter, including installation instructions, see . See also #10341, #10533, and [discussion](https://groups.google.com/g/dataverse-community/c/JI8HPgGarr8/m/DqEIkiwlAgAJ) on the mailing list. + +Please note: the Croissant exporter works best with Dataverse 6.2 and higher (where it updates the content of `` as [described](https://guides.dataverse.org/en/6.4/admin/discoverability.html#schema-org-head) in the guides) but can be used with 6.0 and higher (to get the export functionality). + +### RO-Crate Support (Metadata Export) + +Dataverse now supports [RO-Crate](https://www.researchobject.org/ro-crate/) as a metadata export format. This functionality is not available out of the box, but you can enable one or more RO-Crate exporters from the [list of external exporters](https://guides.dataverse.org/en/6.4/installation/advanced.html#inventory-of-external-exporters). See also #10744 and #10796. + +### Rust API Client Library + +An Dataverse API client library for the Rust programming language is now available at https://github.com/gdcc/rust-dataverse and has been added to the [list of client libraries](https://guides.dataverse.org/en/6.4/api/client-libraries.html) in the API Guide. See also #10758. + +### Collection Thumbnail Logo for Featured Collections + +Collections can now have a thumbnail logo that is displayed when the collection is configured as a featured collection. If present, this thumbnail logo is shown. Otherwise, the collection logo is shown. Configuration is done under the "Theme" for a collection as explained in [the guides](https://guides.dataverse.org/en/6.4/user/dataverse-management.html#theme). See also #10291 and #10433. + +### Saved Searches Can Be Deleted + +Saved searches can now be deleted via API. See the [Saved Search](https://guides.dataverse.org/en/6.4/api/native-api.html#saved-search) section of the API Guide, #9317 and #10198. + +### Notification Email Improvement + +When notification emails are sent the part of the closing that says "contact us for support at" will now show the support email address (`dataverse.mail.support-email`), when configured, instead of the default system email address. Using the system email address here was particularly problematic when it was a "noreply" address. See also #10287 and #10504. + +### Ability to Disable Automatic Thumbnail Selection + +It is now possible to turn off the feature that automatically selects one of the image datafiles to serve as the thumbnail of the parent dataset. An admin can turn it off by enabling the [feature flag](https://guides.dataverse.org/en/6.4/installation/config.html#feature-flags) `dataverse.feature.disable-dataset-thumbnail-autoselect`. When the feature is disabled, a user can still manually pick a thumbnail image, or upload a dedicated thumbnail image. See also #10820. + +### More Flexible PermaLinks + +The configuration setting `dataverse.pid.*.permalink.base-url`, which is used for PermaLinks, has been updated to support greater flexibility. Previously, the string `/citation?persistentId=` was automatically appended to the configured base URL. With this update, the base URL will now be used exactly as configured, without any automatic additions. See also #10775. + +### Globus Async Framework + +A new alternative implementation of Globus polling during upload data transfers has been added in this release. This experimental framework does not rely on the instance staying up continuously for the duration of the transfer and saves the state information about Globus upload requests in the database. See `globus-use-experimental-async-framework` under [Feature Flags](https://guides.dataverse.org/en/6.4/installation/config.html#feature-flags) and [dataverse.files.globus-monitoring-server](https://guides.dataverse.org/en/6.4/installation/config.html#dataverse-files-globus-monitoring-server) in the Installation Guide. See also #10623 and #10781. + +### CVoc (Controlled Vocabulary): Allow ORCID and ROR to Be Used Together in Author Field + +Changes in Dataverse and updates to the ORCID and ROR external vocabulary scripts support deploying these for the citation block author field (and others). See also #10711, #10712, and . + +### Development on Windows + +New instructions have been added for developers on Windows trying to run a Dataverse development environment using Windows Subsystem for Linux (WSL). See [the guides](https://guides.dataverse.org/en/6.4/developers/windows.html), #10606, and #10608. + +### Experimental Crossref PID (DOI) Provider + +Crossref can now be used as a PID (DOI) provider, but this feature is experimental. Please provide feedback through the usual channels. See also the [guides](https://guides.dataverse.org/en/6.4/installation/config.html#crossref-specific-settings), #8581, and #10806. + +### Improved JSON Schema Validation for Datasets + +JSON Schema validation has been enhanced with checks for required and allowed child objects as well as type checking for field types including `primitive`, `compound` and `controlledVocabulary`. More user-friendly error messages help pinpoint the issues in the dataset JSON. See [Retrieve a Dataset JSON Schema for a Collection](https://guides.dataverse.org/en/6.4/api/native-api.html#retrieve-a-dataset-json-schema-for-a-collection) in the API Guide, #10169, and #10543. + +### Counter Processor 1.05 Support (Make Data Count) + +Counter Processor 1.05 is now supported for use with Make Data Count. If you are running Counter Processor, you should reinstall/reconfigure it as described in the latest guides. Note that Counter Processor 1.05 requires Python 3, so you will need to follow the full Counter Processor install. Also note that if you configure the new version the same way, it will reprocess the days in the current month when it is first run. This is normal and will not affect the metrics in Dataverse. See also #10479. + +### Version Tags for Container Base Images + +With this release we introduce a detailed maintenance workflow for our container images. As output of the [Containerization Working Group](https://ct.gdcc.io), the community takes another step towards production ready containers available directly from the core project. + +The maintenance workflow regularly updates the [Container Base Image](https://guides.dataverse.org/en/6.4/container/base-image.html), which contains the operating system, Java, Payara, and tools and libraries required by the Dataverse application. Shipping these rolling releases as well as immutable revisions is the foundation for secure and reliable [Dataverse Application Container](https://guides.dataverse.org/en/6.4/container/app-image.html) images. See also #10478 and #10827. + +## Bugs Fixed + +### Update Current Version + +A significant bug in the superuser-only [Update Current Version](https://guides.dataverse.org/en/6.4/admin/dataverses-datasets.html#make-metadata-updates-without-changing-dataset-version) publication option was fixed. If the "Update Current Version" option was used when changes were made to the dataset terms (rather than to dataset metadata) or if the PID provider service was down or returned an error, the update would fail and render the dataset unusable and require restoration from a backup. The fix in this release allows the update to succeed in both of these cases and redesigns the functionality such that any unknown issues should not make the dataset unusable (i.e. the error would be reported and the dataset would remain in its current state with the last-published version as it was and changes still in the draft version.) + +If you do not plan to upgrade to Dataverse 6.4 right away, you are encouraged to alert your superusers to this issue (see [this post](https://groups.google.com/g/dataverse-community/c/evn5C-pyrS8/m/JrH9vp47DwAJ)). Here are some workarounds for pre-6.4 versions: + +* Change the "dataset.updateRelease" entry in the Bundle.properties file (or local language version) to "Do Not Use" or similar (this doesn't disable the button but alerts superusers to the issue), or +* Edit the dataset.xhtml file to remove the lines below, delete the contents of the generated and osgi-cache directories in the Dataverse Payara domain, and restart the Payara server. This will remove the "Update Current Version" from the UI. + +``` + + + +``` + +Again, the workarounds above are only for pre-6.4 versions. The bug has been fixed in Dataverse 6.4. See also #10797. + +### Broken Thumbnails + +Dataverse 6.3 introduced a bug where publishing would break the dataset thumbnail, which in turn broke the rendering of the parent collection (dataverse) page. + +This bug has been fixed but any existing broken thumbnails must be fixed manually. See "clearThumbnailFailureFlag" in the upgrade instructions below. + +Additionally, it is now possible to turn off the feature that automatically selects of one of the image datafiles to serve as the thumbnail of the parent dataset. An admin can turn it off by raising the feature flag `-Ddataverse.feature.disable-dataset-thumbnail-autoselect=true`. When the feature is disabled, a user can still manually pick a thumbnail image, or upload a dedicated thumbnail image. + +See also #10819, #10820, and [the post](https://groups.google.com/g/dataverse-community/c/evn5C-pyrS8/m/JrH9vp47DwAJ) on the mailing list. + +### No License, No Terms of Use + +When datasets have neither a license nor custom terms of use, the dataset page will now indicate this. Also, these datasets will no longer be indexed as having custom terms. See also #8796, #10513, and #10614. + +### CC0 License Bug Fix + +At a high level, some datasets have been mislabeled as "Custom License" when they should have been "CC0 1.0". This has been corrected. + +In Dataverse 5.10, datasets with only "CC0 Waiver" in the "termsofuse" field were converted to "Custom License" (instead of the CC0 1.0 license) through a SQL migration script (see #10634). On deployment of Dataverse 6.4, a new SQL migration script will be run automatically to correct this, changing these datasets to CC0. You can review the script in #10634, which only affect the following datasets: + +- The existing "Terms of Use" must be equal to "This dataset is made available under a Creative Commons CC0 license with the following additional/modified terms and conditions: CC0 Waiver" (this was set in #10634). +- The following terms fields must be empty: Confidentiality Declaration, Special Permissions, Restrictions, Citation Requirements, Depositor Requirements, Conditions, and Disclaimer. +- The license ID must not be assigned. + +The script will set the license ID to that of the CC0 1.0 license and remove the contents of "termsofuse" field. See also #9081 and #10634. + +### Remap oai_dc Export and Harvesting Format Fields: dc:type and dc:date + +The `oai_dc` export and harvesting format has had the following fields remapped: + +- dc:type was mapped to the field "Kind of Data". Now it is hard-coded to the word "Dataset". +- dc:date was mapped to the field "Production Date" when available and otherwise to "Publication Date". Now it is mapped the field "Publication Date" or the field used for the citation date, if set (see [Set Citation Date Field Type for a Dataset](https://guides.dataverse.org/en/6.4/api/native-api.html#set-citation-date-field-type-for-a-dataset)). + +In order for these changes to be reflected in existing datasets, a [reexport all](https://guides.dataverse.org/en/6.4/admin/metadataexport.html#batch-exports-through-the-api) should be run (mentioned below). See #8129 and #10737. + +### Zip File No Longer Misdetected as Shapefile (Hidden Directories) + +When detecting files types, Dataverse would previously detect a zip file as a shapefile if it contained [markers of a shapefile](https://guides.dataverse.org/en/6.4/developers/geospatial.html) in hidden directories. These hidden directories are now ignored when deciding if a zip file is a shapefile or not. See also #8945 and #10627. + +### External Controlled Vocabulary + +This release fixes a bug (introduced in v6.3) in the external controlled vocabulary mechanism that could cause indexing to fail (with a NullPointerException) when a script is configured for one child field and no other child fields were managed. See also #10869 and #10870. + +### Valid JSON in Error Response + +When any `ApiBlockingFilter` policy applies to a request, the JSON in the body of the error response is now valid JSON. See also #10085. + +### Docker Container Base Image Security and Compatibility + +- Switch "wait-for" to "wait4x", aligned with the Configbaker Image +- Update "jattach" to v2.2 +- Install AMD64 / ARM64 versions of tools as necessary +- Run base image as unprivileged user by default instead of `root` - this was an oversight from OpenShift changes +- Linux User, Payara Admin and Domain Master passwords: + - Print hints about default, public knowledge passwords in place for + - Enable replacing these passwords at container boot time +- Enable building with updates Temurin JRE image based on Ubuntu 24.04 LTS +- Fix entrypoint script troubles with pre- and postboot script files +- Unify location of files at CONFIG_DIR=/opt/payara/config, avoid writing to other places + +See also #10508, #10672 and #10722. + +### Cleanup of Temp Directories + +In this release we addressed an issue where copies of files uploaded via the UI were left in one specific temp directory (`.../domain1/uploads` by default). We would like to remind all the installation admins that it is strongly recommended to have some automated (and aggressive) cleanup mechanisms in place for all the temp directories used by Dataverse. For example, at Harvard/IQSS we have the following configuration for the PrimeFaces uploads directory above: (note that, even with this fix in place, PrimeFaces will be leaving a large number of small log files in that location) + +Instead of the default location (`.../domain1/uploads`) we use a directory on a dedicated partition, outside of the filesystem where Dataverse is installed, via the following JVM option: + +``` +-Ddataverse.files.uploads=/uploads/web +``` + +and we have a dedicated cronjob that runs every 30 minutes and deletes everything older than 2 hours in that directory: + +``` +15,45 * * * * /bin/find /uploads/web/ -mmin +119 -type f -name "upload*" -exec rm -f {} \; > /dev/null 2>&1 +``` + +### Trailing Commas in Author Name Now Permitted + +When an author name ended in a comma (e.g. `Smith,` or `Smith, `), the dataset page was broken after publishing (a "500" error page was presented to the user). The underlying issue causing the JSON-LD Schema.org output on the page to break was fixed. See #10343 and #10776. + +## API Updates + +### Search API: affiliation, parentDataverseName, image_url, etc. + +The Search API (`/api/search`) response now includes additional fields, depending on the type. + +For collections (dataverses): + +- "affiliation" +- "parentDataverseName" +- "parentDataverseIdentifier" +- "image_url" (optional) + +```javascript +"items": [ + { + "name": "Darwin's Finches", + ... + "affiliation": "Dataverse.org", + "parentDataverseName": "Root", + "parentDataverseIdentifier": "root", + "image_url":"/api/access/dvCardImage/{identifier}" +(etc, etc) +``` + +For datasets: + +- "image_url" (optional) + +```javascript +"items": [ + { + ... + "image_url": "http://localhost:8080/api/datasets/2/logo" + ... +(etc, etc) +``` + +For files: + +- "releaseOrCreateDate" +- "image_url" (optional) + +```javascript +"items": [ + { + "name": "test.png", + ... + "releaseOrCreateDate": "2016-05-10T12:53:39Z", + "image_url":"/api/access/datafile/42?imageThumb=true" +(etc, etc) +``` + +These examples are also shown in the [Search API](https://guides.dataverse.org/en/6.4/api/search.html) section of the API Guide. + +The image_url field was already part of the SolrSearchResult JSON (and incorrectly appeared in Search API documentation), but it wasn't returned by the API because it was appended only after the Solr query was executed in the SearchIncludeFragment of JSF (the old/current UI framework). Now, the field is set in SearchServiceBean, ensuring it is always returned by the API when an image is available. + +The Solr schema.xml file has been updated to include a new field called "dvParentAlias" for supporting the new response field "parentDataverseIdentifier". See upgrade instructions below. + +See also #10810 and #10811. + +### Search API: publicationStatuses + +The Search API (`/api/search`) response will now include publicationStatuses in the JSON response as long as the list is not empty. + +Example: + +```javascript +"items": [ + { + "name": "Darwin's Finches", + ... + "publicationStatuses": [ + "Unpublished", + "Draft" + ], +(etc, etc) +``` + +See also #10733 and #10738. + +### Search Facet Information Exposed + +A new endpoint `/api/datasetfields/facetables` lists all facetable dataset fields defined in the installation, as described in [the guides](https://guides.dataverse.org/en/6.4/api/native-api.html#list-all-facetable-dataset-fields). + +A new optional query parameter "returnDetails" added to `/api/dataverses/{identifier}/facets/` endpoint to include detailed information of each DataverseFacet, as described in [the guides](https://guides.dataverse.org/en/6.4/api/native-api.html#list-facets-configured-for-a-dataverse-collection). See also #10726 and #10727. + +### User Permissions on Collections + +A new endpoint at `/api/dataverses/{identifier}/userPermissions` for obtaining the user permissions on a collection (dataverse) has been added. See also [the guides](https://guides.dataverse.org/en/6.4/api/native-api.html#get-user-permissions-on-a-dataverse), #10749 and #10751. + +### addDataverse Extended + +The addDataverse (`/api/dataverses/{identifier}`) API endpoint has been extended to allow adding metadata blocks, input levels and facet IDs at creation time, as the Dataverse page in create mode does in JSF. See also [the guides](https://guides.dataverse.org/en/6.4/api/native-api.html#create-a-dataverse-collection), #10633 and #10644. + +### Metadata Blocks and Display on Create + +The `/api/dataverses/{identifier}/metadatablocks` endpoint has been fixed to not return fields marked as displayOnCreate=true if there is an input level with include=false, when query parameters returnDatasetFieldTypes=true and onlyDisplayedOnCreate=true are set. See also #10741 and #10767. + +The fields "depositor" and "dateOfDeposit" in the citation.tsv metadata block file have been updated to have the property "displayOnCreate" set to TRUE. In practice, only the API is affected because the UI has special logic that already shows these fields when datasets are created. See also and #10850 and #10884. + +### Feature Flags Can Be Listed + +It is now possible to list all feature flags and see if they are enabled or not. See also [the guides](https://guides.dataverse.org/en/6.4/api/native-api.html#list-all-feature-flags) and #10732. + +## Settings Added + +The following settings have been added: + +- dataverse.feature.disable-dataset-thumbnail-autoselect +- dataverse.feature.globus-use-experimental-async-framework +- dataverse.files.globus-monitoring-server +- dataverse.pid.*.crossref.url +- dataverse.pid.*.crossref.rest-api-url +- dataverse.pid.*.crossref.username +- dataverse.pid.*.crossref.password +- dataverse.pid.*.crossref.depositor +- dataverse.pid.*.crossref.depositor-email + +## Backward Incompatible Changes + +- The oai_dc export format has changed. See the "Remap oai_dc" section above. +- Several APIs related to DataCite have changed. See "More and Better Data Sent to DataCite" above. + +## Complete List of Changes + +For the complete list of code changes in this release, see the [6.4 milestone](https://github.com/IQSS/dataverse/issues?q=milestone%3A6.4+is%3Aclosed) in GitHub. + +## Getting Help + +For help with upgrading, installing, or general questions please post to the [Dataverse Community Google Group](https://groups.google.com/g/dataverse-community) or email support@dataverse.org. + +## Installation + +If this is a new installation, please follow our [Installation Guide](https://guides.dataverse.org/en/latest/installation/). Please don't be shy about [asking for help](https://guides.dataverse.org/en/latest/installation/intro.html#getting-help) if you need it! + +Once you are in production, we would be delighted to update our [map of Dataverse installations](https://dataverse.org/installations) around the world to include yours! Please [create an issue](https://github.com/IQSS/dataverse-installations/issues) or email us at support@dataverse.org to join the club! + +You are also very welcome to join the [Global Dataverse Community Consortium](https://www.gdcc.io/) (GDCC). + +## Upgrade Instructions + +Upgrading requires a maintenance window and downtime. Please plan accordingly, create backups of your database, etc. + +These instructions assume that you've already upgraded through all the 5.x releases and are now running Dataverse 6.3. + +0\. These instructions assume that you are upgrading from the immediate previous version. If you are running an earlier version, the only supported way to upgrade is to progress through the upgrades to all the releases in between before attempting the upgrade to this version. + +If you are running Payara as a non-root user (and you should be!), **remember not to execute the commands below as root**. Use `sudo` to change to that user first. For example, `sudo -i -u dataverse` if `dataverse` is your dedicated application user. + +In the following commands, we assume that Payara 6 is installed in `/usr/local/payara6`. If not, adjust as needed. + +```shell +export PAYARA=/usr/local/payara6` +``` + +(or `setenv PAYARA /usr/local/payara6` if you are using a `csh`-like shell) + +1\. Undeploy the previous version + +```shell +$PAYARA/bin/asadmin undeploy dataverse-6.3 +``` + +2\. Stop and start Payara + +```shell +service payara stop +sudo service payara start +``` + +3\. Deploy this version + +```shell +$PAYARA/bin/asadmin deploy dataverse-6.4.war +``` + +Note: if you have any trouble deploying, stop Payara, remove the following directories, start Payara, and try to deploy again. + +```shell +service payara stop +rm -rf $PAYARA/glassfish/domains/domain1/generated +rm -rf $PAYARA/glassfish/domains/domain1/osgi-cache +rm -rf $PAYARA/glassfish/domains/domain1/lib/databases +``` + +4\. For installations with internationalization: + +Please remember to update translations via [Dataverse language packs](https://github.com/GlobalDataverseCommunityConsortium/dataverse-language-packs). + +5\. Restart Payara + +```shell +service payara stop +service payara start +``` + +6\. Update metadata blocks + +These changes reflect incremental improvements made to the handling of core metadata fields. + +```shell +wget https://raw.githubusercontent.com/IQSS/dataverse/v6.4/scripts/api/data/metadatablocks/citation.tsv + +curl http://localhost:8080/api/admin/datasetfield/load -H "Content-type: text/tab-separated-values" -X POST --upload-file citation.tsv +``` + +7\. Update Solr schema.xml file. Start with the standard v6.4 schema.xml, then, if your installation uses any custom or experimental metadata blocks, update it to include the extra fields (step 7a). + +Stop Solr (usually `service solr stop`, depending on Solr installation/OS, see the [Installation Guide](https://guides.dataverse.org/en/6.4/installation/prerequisites.html#solr-init-script)). + +```shell +service solr stop +``` + +Replace schema.xml + +```shell +wget https://raw.githubusercontent.com/IQSS/dataverse/v6.4/conf/solr/schema.xml +cp schema.xml /usr/local/solr/solr-9.4.1/server/solr/collection1/conf +``` + +Start Solr (but if you use any custom metadata blocks, perform the next step, 7a first). + +```shell +service solr start +``` + +7a\. For installations with custom or experimental metadata blocks: + +Before starting Solr, update the schema to include all the extra metadata fields that your installation uses. We do this by collecting the output of the Dataverse schema API and feeding it to the `update-fields.sh` script that we supply, as in the example below (modify the command lines as needed to reflect the names of the directories, if different): + +```shell + wget https://raw.githubusercontent.com/IQSS/dataverse/v6.4/conf/solr/update-fields.sh + chmod +x update-fields.sh + curl "http://localhost:8080/api/admin/index/solr/schema" | ./update-fields.sh /usr/local/solr/solr-9.4.1/server/solr/collection1/conf/schema.xml +``` + +Now start Solr. + +8\. Reindex Solr + +Below is the simplest way to reindex Solr: + +```shell +curl http://localhost:8080/api/admin/index +``` + +The API above rebuilds the existing index "in place". If you want to be absolutely sure that your index is up-to-date and consistent, you may consider wiping it clean and reindexing everything from scratch (see [the guides](https://guides.dataverse.org/en/latest/admin/solr-search-index.html)). Just note that, depending on the size of your database, a full reindex may take a while and the users will be seeing incomplete search results during that window. + +9\. Run reExportAll to update dataset metadata exports + +This step is necessary because of changes described above for the `Datacite` and `oai_dc` export formats. + +Below is the simple way to reexport all dataset metadata. For more advanced usage, please see [the guides](http://guides.dataverse.org/en/6.4/admin/metadataexport.html#batch-exports-through-the-api). + +```shell +curl http://localhost:8080/api/admin/metadata/reExportAll +``` + +10\. Pushing updated metadata to DataCite + +(If you don't use DataCite, you can skip this.) + +Above you updated the citation metadata block and Solr with the new "relationType" field. With these two changes, the "Relation Type" fields will be available and creation/publication of datasets will result in the expanded XML being sent to DataCite. You've also already run "reExportAll" to update the `Datacite` metadata export format. + +Entries at DataCite for published datasets can be updated by a superuser using an API call (newly [documented](https://guides.dataverse.org/en/6.4/admin/dataverses-datasets.html#update-metadata-for-all-published-datasets-at-the-pid-provider)): + +`curl -X POST -H 'X-Dataverse-key:' http://localhost:8080/api/datasets/modifyRegistrationPIDMetadataAll` + +This will loop through all published datasets (and released files with PIDs). As long as the loop completes, the call will return a 200/OK response. Any PIDs for which the update fails can be found using the following command: + +`grep 'Failure for id' server.log` + +Failures may occur if PIDs were never registered, or if they were never made findable. Any such cases can be fixed manually in DataCite Fabrica or using the [Reserve a PID](https://guides.dataverse.org/en/6.4/api/native-api.html#reserve-a-pid) API call and the newly documented `/api/datasets//modifyRegistration` call respectively. See https://guides.dataverse.org/en/6.4/admin/dataverses-datasets.html#send-dataset-metadata-to-pid-provider. Please reach out with any questions. + +PIDs can also be updated by a superuser on a per-dataset basis using + +`curl -X POST -H 'X-Dataverse-key:' http://localhost:8080/api/datasets//modifyRegistrationMetadata` + +### Additional Upgrade Steps + +11\. If there are broken thumbnails + +To restore any broken thumbnails caused by the bug described above, you can call the `http://localhost:8080/api/admin/clearThumbnailFailureFlag` API, which will attempt to clear the flag on all files (regardless of whether caused by this bug or some other problem with the file) or the `http://localhost:8080/api/admin/clearThumbnailFailureFlag/$FILE_ID` to clear the flag for individual files. Calling the former, batch API is recommended. + +12\. PermaLinks with custom base-url + +If you currently use PermaLinks with a custom `base-url`: You must manually append `/citation?persistentId=` to the base URL to maintain functionality. + +If you use a PermaLinks without a configured `base-url`, no changes are required. diff --git a/doc/release-notes/7068-reserve-file-pids.md b/doc/release-notes/7068-reserve-file-pids.md deleted file mode 100644 index 182a0d7f67b..00000000000 --- a/doc/release-notes/7068-reserve-file-pids.md +++ /dev/null @@ -1,9 +0,0 @@ -## Release Highlights - -### Pre-Publish File DOI Reservation with DataCite - -Dataverse installations using DataCite (or other persistent identifier (PID) Providers that support reserving PIDs) will be able to reserve PIDs for files when they are uploaded (rather than at publication time). Note that reserving file DOIs can slow uploads with large numbers of files so administrators may need to adjust timeouts (specifically any Apache "``ProxyPass / ajp://localhost:8009/ timeout=``" setting in the recommended Dataverse configuration). - -## Major Use Cases - -- Users will have DOIs/PIDs reserved for their files as part of file upload instead of at publication time. (Issue #7068, PR #7334) diff --git a/doc/release-notes/8129-harvesting.md b/doc/release-notes/8129-harvesting.md deleted file mode 100644 index 63ca8744941..00000000000 --- a/doc/release-notes/8129-harvesting.md +++ /dev/null @@ -1,18 +0,0 @@ -### Remap oai_dc export and harvesting format fields: dc:type and dc:date - -The `oai_dc` export and harvesting format has had the following fields remapped: - -- dc:type was mapped to the field "Kind of Data". Now it is hard-coded to the word "Dataset". -- dc:date was mapped to the field "Production Date" when available and otherwise to "Publication Date". Now it is mapped the field "Publication Date" or the field used for the citation date, if set (see [Set Citation Date Field Type for a Dataset](https://guides.dataverse.org/en/6.3/api/native-api.html#set-citation-date-field-type-for-a-dataset)). - -In order for these changes to be reflected in existing datasets, a [reexport all](https://guides.dataverse.org/en/6.3/admin/metadataexport.html#batch-exports-through-the-api) should be run. - -For more information, please see #8129 and #10737. - -### Backward incompatible changes - -See the "Remap oai_dc export" section above. - -### Upgrade instructions - -In order for changes to the `oai_dc` metadata export format to be reflected in existing datasets, a [reexport all](https://guides.dataverse.org/en/6.3/admin/metadataexport.html#batch-exports-through-the-api) should be run. diff --git a/doc/release-notes/8578-support-for-iso-639-3-languages.md b/doc/release-notes/8578-support-for-iso-639-3-languages.md deleted file mode 100644 index c702b6b8a59..00000000000 --- a/doc/release-notes/8578-support-for-iso-639-3-languages.md +++ /dev/null @@ -1,17 +0,0 @@ -The Controlled Vocabulary Values list for the metadata field Language in the Citation block has now been extended to include roughly 7920 ISO 639-3 values. -- Some of the language entries in the pre-v.6.4 list correspond to "macro languages" in ISO-639-3 and admins/users may wish to update to use the corresponding individual language entries from ISO-639-3. As these cases are expected to be rare (they do not involve major world languages), finding them is not covered in the release notes. Anyone who desires help in this area is encouraged to reach out to the Dataverse community via any of the standard communication channels. -- ISO 639-3 codes were downloaded from: -``` -https://iso639-3.sil.org/code_tables/download_tables#Complete%20Code%20Tables:~:text=iso%2D639%2D3_Code_Tables_20240415.zip -``` -- The file used for merging with the existing citation.tsv was iso-639-3.tab - -To be added to the 6.4 release instructions: - -### Additional Upgrade Steps -6\. Update the Citation metadata block: - -``` -- `wget https://github.com/IQSS/dataverse/releases/download/v6.4/citation.tsv` -- `curl http://localhost:8080/api/admin/datasetfield/load -X POST --data-binary @citation.tsv -H "Content-type: text/tab-separated-values"` -``` diff --git a/doc/release-notes/8581-add-crossref-pid-provider.md b/doc/release-notes/8581-add-crossref-pid-provider.md deleted file mode 100644 index 3610aa6d7cc..00000000000 --- a/doc/release-notes/8581-add-crossref-pid-provider.md +++ /dev/null @@ -1,3 +0,0 @@ -Added CrossRef DOI Pid Provider - -See Installation Configuration document for JVM Settings to enable CrossRef as a Pid Provider diff --git a/doc/release-notes/8796-fix-license-display-indexing.md b/doc/release-notes/8796-fix-license-display-indexing.md deleted file mode 100644 index ebded088875..00000000000 --- a/doc/release-notes/8796-fix-license-display-indexing.md +++ /dev/null @@ -1 +0,0 @@ -When datasets have neither a license nor custom terms of use the display will indicate this. Also, these datasets will no longer be indexed as having custom terms. diff --git a/doc/release-notes/8945-ignore-shapefiles-under-hidden-directories-in-zip.md b/doc/release-notes/8945-ignore-shapefiles-under-hidden-directories-in-zip.md deleted file mode 100644 index 145ae5f6d55..00000000000 --- a/doc/release-notes/8945-ignore-shapefiles-under-hidden-directories-in-zip.md +++ /dev/null @@ -1,5 +0,0 @@ -### Shapefile Handling will now ignore files under a hidden directory within the zip file - -Directories that are hidden will be ignored when determining if a zip file contains Shapefile files. - -For more information, see #8945. \ No newline at end of file diff --git a/doc/release-notes/9081-CC0-waiver-turned-into-custom-license.md b/doc/release-notes/9081-CC0-waiver-turned-into-custom-license.md deleted file mode 100644 index 042b2ec39fd..00000000000 --- a/doc/release-notes/9081-CC0-waiver-turned-into-custom-license.md +++ /dev/null @@ -1,6 +0,0 @@ -In an earlier Dataverse release, Datasets with only 'CC0 Waiver' in termsofuse field were converted to 'Custom License' instead of CC0 1.0 licenses during an automated process. A new process was added to correct this. Only Datasets with no terms other than the one create by the previous process will be modified. -- The existing 'Terms of Use' must be equal to 'This dataset is made available under a Creative Commons CC0 license with the following additional/modified terms and conditions: CC0 Waiver' -- The following terms fields must be empty: Confidentiality Declaration, Special Permissions, Restrictions, Citation Requirements, Depositor Requirements, Conditions, and Disclaimer. -- The License ID must not be assigned. - -This process will set the License ID to that of the CC0 1.0 license and remove the contents of termsofuse field. diff --git a/doc/release-notes/9317-delete-saved-search.md b/doc/release-notes/9317-delete-saved-search.md deleted file mode 100644 index 34723801036..00000000000 --- a/doc/release-notes/9317-delete-saved-search.md +++ /dev/null @@ -1,4 +0,0 @@ -### Saved search deletion - -Saved searches can now be removed using API `/api/admin/savedsearches/$id`. See PR #10198. -This is reflected in the [Saved Search Native API section](https://dataverse-guide--10198.org.readthedocs.build/en/10198/api/native-api.html#saved-search) of the Guide. \ No newline at end of file diff --git a/doc/release-notes/9408-fix-facets-labels-not-translated-in-result-block.md b/doc/release-notes/9408-fix-facets-labels-not-translated-in-result-block.md new file mode 100644 index 00000000000..344859e2dbd --- /dev/null +++ b/doc/release-notes/9408-fix-facets-labels-not-translated-in-result-block.md @@ -0,0 +1,7 @@ +## Fix facets filter labels not translated in result block + +On the main page, it's possible to filter results using search facets. If internationalization (i18n) has been activated in the Dataverse installation, allowing pages to be displayed in several languages, the facets are translated in the filter column. However, they aren't translated in the search results and remain in the default language, English. + +This version of Dataverse fix this, and includes internationalization in the facets visible in the search results section. + +For more information, see issue [#9408](https://github.com/IQSS/dataverse/issues/9408) and pull request [#10158](https://github.com/IQSS/dataverse/pull/10158) diff --git a/doc/release-notes/9650-5-improve-list-linked-dataverses-API.md b/doc/release-notes/9650-5-improve-list-linked-dataverses-API.md new file mode 100644 index 00000000000..8c79955891b --- /dev/null +++ b/doc/release-notes/9650-5-improve-list-linked-dataverses-API.md @@ -0,0 +1,5 @@ +The following API have been added: + +/api/datasets/{datasetId}/links + +It lists the linked dataverses to a dataset. It can be executed only by administrators. \ No newline at end of file diff --git a/doc/release-notes/api-blocking-filter-json.md b/doc/release-notes/api-blocking-filter-json.md deleted file mode 100644 index 337ff82dd8b..00000000000 --- a/doc/release-notes/api-blocking-filter-json.md +++ /dev/null @@ -1,3 +0,0 @@ -* When any `ApiBlockingFilter` policy applies to a request, the JSON in the body of the error response is now valid JSON. - In case an API client did any special processing to allow it to parse the body, that is no longer necessary. - The status code of such responses has not changed. diff --git a/doc/release-notes/make-data-count-.md b/doc/release-notes/make-data-count-.md deleted file mode 100644 index 9022582dddb..00000000000 --- a/doc/release-notes/make-data-count-.md +++ /dev/null @@ -1,3 +0,0 @@ -### Counter Processor 1.05 Support - -This release includes support for counter-processor-1.05 for processing Make Data Count metrics. If you are running Make Data Counts support, you should reinstall/reconfigure counter-processor as described in the latest Guides. (For existing installations, note that counter-processor-1.05 requires a Python3, so you will need to follow the full counter-processor install. Also note that if you configure the new version the same way, it will reprocess the days in the current month when it is first run. This is normal and will not affect the metrics in Dataverse.) diff --git a/doc/release-notes/permalink-base-urls.md b/doc/release-notes/permalink-base-urls.md deleted file mode 100644 index 1dd74057351..00000000000 --- a/doc/release-notes/permalink-base-urls.md +++ /dev/null @@ -1,10 +0,0 @@ -The configuration setting `dataverse.pid.*.permalink.base-url`, which is used for PermaLinks, has been updated to -support greater flexibility. Previously, the string "/citation?persistentId=" was automatically appended to the -configured base URL. With this update, the base URL will now be used exactly as configured, without any automatic -additions. - -**Upgrade instructions:** - -- If you currently use a PermaLink provider with a configured `base-url`: You must manually append - "/citation?persistentId=" to the existing base URL to maintain functionality. -- If you use a PermaLink provider without a configured `base-url`: No changes are required. \ No newline at end of file diff --git a/doc/sphinx-guides/source/admin/dataverses-datasets.rst b/doc/sphinx-guides/source/admin/dataverses-datasets.rst index b4d365c4fd4..7c03a6f80c0 100644 --- a/doc/sphinx-guides/source/admin/dataverses-datasets.rst +++ b/doc/sphinx-guides/source/admin/dataverses-datasets.rst @@ -129,15 +129,21 @@ Lists the link(s) created between a dataset and a Dataverse collection (see the curl -H "X-Dataverse-key: $API_TOKEN" http://$SERVER/api/datasets/$linked-dataset-id/links -It returns a list in the following format: +It returns a list in the following format (new format as of v6.4): .. code-block:: json { "status": "OK", "data": { - "dataverses that link to dataset id 56782": [ - "crc990 (id 18802)" + "id": 5, + "identifier": "FK2/OTCWMM", + "linked-dataverses": [ + { + "id": 2, + "alias": "dataverse1", + "displayName": "Lab experiments 2023 June" + } ] } } diff --git a/doc/sphinx-guides/source/admin/metadatacustomization.rst b/doc/sphinx-guides/source/admin/metadatacustomization.rst index 8752f11c1e5..e5326efebef 100644 --- a/doc/sphinx-guides/source/admin/metadatacustomization.rst +++ b/doc/sphinx-guides/source/admin/metadatacustomization.rst @@ -579,7 +579,7 @@ In general, the external vocabulary support mechanism may be a better choice for The specifics of the user interface for entering/selecting a vocabulary term and how that term is then displayed are managed by third-party Javascripts. The initial Javascripts that have been created provide auto-completion, displaying a list of choices that match what the user has typed so far, but other interfaces, such as displaying a tree of options for a hierarchical vocabulary, are possible. Similarly, existing scripts do relatively simple things for displaying a term - showing the term's name in the appropriate language and providing a link to an external URL with more information, but more sophisticated displays are possible. -Scripts supporting use of vocabularies from services supporting the SKOMOS protocol (see https://skosmos.org), retrieving ORCIDs (from https://orcid.org), services based on Ontoportal product (see https://ontoportal.org/), and using ROR (https://ror.org/) are available https://github.com/gdcc/dataverse-external-vocab-support. (Custom scripts can also be used and community members are encouraged to share new scripts through the dataverse-external-vocab-support repository.) +Scripts supporting use of vocabularies from services supporting the SKOSMOS protocol (see https://skosmos.org), retrieving ORCIDs (from https://orcid.org), services based on Ontoportal product (see https://ontoportal.org/), and using ROR (https://ror.org/) are available https://github.com/gdcc/dataverse-external-vocab-support. (Custom scripts can also be used and community members are encouraged to share new scripts through the dataverse-external-vocab-support repository.) Configuration involves specifying which fields are to be mapped, to which Solr field they should be indexed, whether free-text entries are allowed, which vocabulary(ies) should be used, what languages those vocabulary(ies) are available in, and several service protocol and service instance specific parameters, including the ability to send HTTP headers on calls to the service. These are all defined in the :ref:`:CVocConf <:CVocConf>` setting as a JSON array. Details about the required elements as well as example JSON arrays are available at https://github.com/gdcc/dataverse-external-vocab-support, along with an example metadata block that can be used for testing. diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index c5890fd9996..92cd4fc941b 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -12,7 +12,7 @@ v6.4 - **/api/datasets/$dataset-id/modifyRegistration**: Changed from GET to POST - **/api/datasets/modifyRegistrationPIDMetadataAll**: Changed from GET to POST - +- **/api/datasets/{identifier}/links**: The GET endpoint returns a list of Dataverses linked to the given Dataset. The format of the response has changes for v6.4 making it backward incompatible. v6.3 ---- diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 117aceb141d..6254742eebb 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -74,6 +74,58 @@ The request JSON supports an optional ``metadataBlocks`` object, with the follow To obtain an example of how these objects are included in the JSON file, download :download:`dataverse-complete-optional-params.json <../_static/api/dataverse-complete-optional-params.json>` file and modify it to suit your needs. +.. _update-dataverse-api: + +Update a Dataverse Collection +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Updates an existing Dataverse collection using a JSON file following the same structure as the one used in the API for the creation. (see :ref:`create-dataverse-api`). + +The steps for updating a Dataverse collection are: + +- Prepare a JSON file containing the fields for the properties you want to update. You do not need to include all the properties, only the ones you want to update. +- Execute a curl command or equivalent. + +As an example, you can download :download:`dataverse-complete.json <../_static/api/dataverse-complete.json>` file and modify it to suit your needs. The controlled vocabulary for ``dataverseType`` is the following: + +- ``DEPARTMENT`` +- ``JOURNALS`` +- ``LABORATORY`` +- ``ORGANIZATIONS_INSTITUTIONS`` +- ``RESEARCHERS`` +- ``RESEARCH_GROUP`` +- ``RESEARCH_PROJECTS`` +- ``TEACHING_COURSES`` +- ``UNCATEGORIZED`` + +The curl command below assumes you are using the name "dataverse-complete.json" and that this file is in your current working directory. + +Next you need to figure out the alias or database id of the Dataverse collection you want to update. + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export DV_ALIAS=dvAlias + + curl -H "X-Dataverse-key:$API_TOKEN" -X PUT "$SERVER_URL/api/dataverses/$DV_ALIAS" --upload-file dataverse-complete.json + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X PUT "https://demo.dataverse.org/api/dataverses/dvAlias" --upload-file dataverse-complete.json + +You should expect an HTTP 200 response and JSON beginning with "status":"OK" followed by a representation of the updated Dataverse collection. + +Same as in :ref:`create-dataverse-api`, the request JSON supports an optional ``metadataBlocks`` object, with the following supported sub-objects: + +- ``metadataBlockNames``: The names of the metadata blocks you want to add to the Dataverse collection. +- ``inputLevels``: The names of the fields in each metadata block for which you want to add a custom configuration regarding their inclusion or requirement when creating and editing datasets in the new Dataverse collection. Note that if the corresponding metadata blocks names are not specified in the ``metadataBlockNames``` field, they will be added automatically to the Dataverse collection. +- ``facetIds``: The names of the fields to use as facets for browsing datasets and collections in the new Dataverse collection. Note that the order of the facets is defined by their order in the provided JSON array. + +To obtain an example of how these objects are included in the JSON file, download :download:`dataverse-complete-optional-params.json <../_static/api/dataverse-complete-optional-params.json>` file and modify it to suit your needs. + .. _view-dataverse: View a Dataverse Collection @@ -887,7 +939,7 @@ Before calling the API, make sure the data files referenced by the ``POST``\ ed * This API does not cover staging files (with correct contents, checksums, sizes, etc.) in the corresponding places in the Dataverse installation's filestore. * This API endpoint does not support importing *files'* persistent identifiers. - * A Dataverse installation can import datasets with a valid PID that uses a different protocol or authority than said server is configured for. However, the server will not update the PID metadata on subsequent update and publish actions. + * A Dataverse installation can only import datasets with a valid PID that is managed by one of the PID providers that said installation is configured for. .. _import-dataset-with-type: @@ -935,7 +987,7 @@ Note that DDI XML does not have a field that corresponds to the "Subject" field .. warning:: * This API does not handle files related to the DDI file. - * A Dataverse installation can import datasets with a valid PID that uses a different protocol or authority than said server is configured for. However, the server will not update the PID metadata on subsequent update and publish actions. + * A Dataverse installation can only import datasets with a valid PID that is managed by one of the PID providers that said installation is configured for. .. _publish-dataverse-api: @@ -4412,6 +4464,12 @@ In order to obtain a new token use:: curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/users/token/recreate" +This endpoint by default will return a response message indicating the user identifier and the new token. + +To also include the expiration time in the response message, the query parameter ``returnExpiration`` must be set to true:: + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/users/token/recreate?returnExpiration=true" + Delete a Token ~~~~~~~~~~~~~~ diff --git a/doc/sphinx-guides/source/conf.py b/doc/sphinx-guides/source/conf.py index c719fb05e3c..7ee355302d8 100755 --- a/doc/sphinx-guides/source/conf.py +++ b/doc/sphinx-guides/source/conf.py @@ -68,9 +68,9 @@ # built documents. # # The short X.Y version. -version = '6.3' +version = '6.4' # The full version, including alpha/beta/rc tags. -release = '6.3' +release = '6.4' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/doc/sphinx-guides/source/container/dev-usage.rst b/doc/sphinx-guides/source/container/dev-usage.rst index 6a1edcf7ebd..c02c1d4010f 100644 --- a/doc/sphinx-guides/source/container/dev-usage.rst +++ b/doc/sphinx-guides/source/container/dev-usage.rst @@ -140,6 +140,56 @@ Alternatives: - If you used Docker Compose for running, you may use ``docker compose -f docker-compose-dev.yml logs ``. Options are the same. +Accessing Harvesting Log Files +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +\1. Open a terminal and access the Dataverse container. + +Run the following command to access the Dataverse container (assuming your container is named dataverse-1): + +.. code-block:: + + docker exec -it dataverse-1 bash + +This command opens an interactive shell within the dataverse-1 container. + +\2. Navigate to the log files directory. + +Once inside the container, navigate to the directory where Dataverse logs are stored: + +.. code-block:: + + cd /opt/payara/appserver/glassfish/domains/domain1/logs + +This directory contains various log files, including those relevant to harvesting. + +\3. Create a directory for copying files. + +Create a directory where you'll copy the files you want to access on your local machine: + +.. code-block:: + + mkdir /dv/filesToCopy + +This will create a new folder named filesToCopy inside /dv. + +\4. Copy the files to the new directory. + +Copy all files from the current directory to the newly created filesToCopy directory: + +.. code-block:: + + cp * /dv/filesToCopy + +This command copies all files in the logs directory to /dv/filesToCopy. + +\5. Access the files on your local machine. + +On your local machine, the copied files should appear in the following directory: + +.. code-block:: + + docker-dev-volumes/app/data/filesToCopy Redeploying ----------- diff --git a/doc/sphinx-guides/source/container/running/demo.rst b/doc/sphinx-guides/source/container/running/demo.rst index f9642347558..2e404e7a09a 100644 --- a/doc/sphinx-guides/source/container/running/demo.rst +++ b/doc/sphinx-guides/source/container/running/demo.rst @@ -137,6 +137,23 @@ In the example below of configuring :ref:`:FooterCopyright` we use the default u One you make this change it should be visible in the copyright in the bottom left of every page. +Multiple Languages +++++++++++++++++++ + +Generally speaking, you'll want to follow :ref:`i18n` in the Installation Guide to set up multiple languages such as English and French. + +To set up the toggle between English and French, we'll use a slight variation on the command in the instructions above, adding the unblock key we created above: + +``curl "http://localhost:8080/api/admin/settings/:Languages?unblock-key=unblockme" -X PUT -d '[{"locale":"en","title":"English"},{"locale":"fr","title":"Français"}]'`` + +Similarly, when loading the "languages.zip" file, we'll add the unblock key: + +``curl "http://localhost:8080/api/admin/datasetfield/loadpropertyfiles?unblock-key=unblockme" -X POST --upload-file /tmp/languages/languages.zip -H "Content-Type: application/zip"`` + +Stop and start the Dataverse container in order for the language toggle to work. + +Note that ``dataverse.lang.directory=/dv/lang`` has already been configured for you in the ``compose.yml`` file. The step where you loaded "languages.zip" should have populated the ``/dv/lang`` directory with files ending in ".properties". + Next Steps ---------- diff --git a/doc/sphinx-guides/source/developers/making-releases.rst b/doc/sphinx-guides/source/developers/making-releases.rst index 4936e942389..4b52b3ce922 100755 --- a/doc/sphinx-guides/source/developers/making-releases.rst +++ b/doc/sphinx-guides/source/developers/making-releases.rst @@ -10,10 +10,43 @@ Introduction This document is about releasing the main Dataverse app (https://github.com/IQSS/dataverse). See :doc:`making-library-releases` for how to release our various libraries. Other projects have their own release documentation. -The steps below describe making both regular releases and hotfix releases. - Below you'll see branches like "develop" and "master" mentioned. For more on our branching strategy, see :doc:`version-control`. +Regular or Hotfix? +------------------ + +Early on, make sure it's clear what type of release this is. The steps below describe making both regular releases and hotfix releases. + +- regular + + - e.g. 6.5 (minor) + - e.g. 7.0 (major) + +- hotfix + + - e.g. 6.4.1 (patch) + - e.g. 7.0.1 (patch) + +Ensure Issues Have Been Created +------------------------------- + +In advance of a release, GitHub issues should have been created already that capture certain steps. See https://github.com/IQSS/dataverse-pm/issues/335 for examples. + +Declare a Code Freeze +--------------------- + +The following steps are made more difficult if code is changing in the "develop" branch. Declare a code freeze until the release is out. Do not allow pull requests to be merged. + +Conduct Performance Testing +--------------------------- + +See :doc:`/qa/performance-tests` for details. + +Conduct Smoke Testing +--------------------- + +See :doc:`/qa/testing-approach` for details. + .. _write-release-notes: Write Release Notes @@ -23,26 +56,51 @@ Developers express the need for an addition to release notes by creating a "rele The task at or near release time is to collect these snippets into a single file. -- Create an issue in GitHub to track the work of creating release notes for the upcoming release. +- Find the issue in GitHub that tracks the work of creating release notes for the upcoming release. - Create a branch, add a .md file for the release (ex. 5.10.1 Release Notes) in ``/doc/release-notes`` and write the release notes, making sure to pull content from the release note snippets mentioned above. Snippets may not include any issue number or pull request number in the text so be sure copy the number from the filename of the snippet into the final release note. - Delete (``git rm``) the release note snippets as the content is added to the main release notes file. - Include instructions describing the steps required to upgrade the application from the previous version. These must be customized for release numbers and special circumstances such as changes to metadata blocks and infrastructure. -- Take the release notes .md through the regular Code Review and QA process. That is, make a pull request. +- Take the release notes .md through the regular Code Review and QA process. That is, make a pull request. Here's an example: https://github.com/IQSS/dataverse/pull/10866 -Create a GitHub Issue and Branch for the Release ------------------------------------------------- +Upgrade Instructions for Internal +--------------------------------- + +To upgrade internal, go to /doc/release-notes, open the release-notes.md file for the current release and perform all the steps under "Upgrade Instructions". + +Deploy Release Candidate to Demo +-------------------------------- + +First, build the release candidate. + +ssh into the dataverse-internal server and undeploy the current war file. + +Go to https://jenkins.dataverse.org/job/IQSS_Dataverse_Internal/ and make the following adjustments to the config: + +- Repository URL: ``https://github.com/IQSS/dataverse.git`` +- Branch Specifier (blank for 'any'): ``*/develop`` +- Execute shell: Update version in filenames to ``dataverse-5.10.war`` (for example) + +Click "Save" then "Build Now". + +This will build the war file, and then automatically deploy it on dataverse-internal. Verify that the application has deployed successfully. + +You can scp the war file to the demo server or download it from https://jenkins.dataverse.org/job/IQSS_Dataverse_Internal/ws/target/ + +ssh into the demo server and follow the upgrade instructions in the release notes. + +Prepare Release Branch +---------------------- + +The release branch will have the final changes such as bumping the version number. Usually we branch from the "develop" branch to create the release branch. If we are creating a hotfix for a particular version (5.11, for example), we branch from the tag (e.g. ``v5.11``). -Use the GitHub issue number and the release tag for the name of the branch. (e.g. ``8583-update-version-to-v5.10.1`` +Create a release branch named after the issue that tracks bumping the version with a descriptive name like "10852-bump-to-6.4" from https://github.com/IQSS/dataverse/pull/10871. **Note:** the changes below must be the very last commits merged into the develop branch before it is merged into master and tagged for the release! Make the following changes in the release branch. -Bump Version Numbers and Prepare Container Tags ------------------------------------------------ - Increment the version number to the milestone (e.g. 5.10.1) in the following two files: - modules/dataverse-parent/pom.xml -> ```` -> ```` (e.g. `pom.xml commit `_) @@ -58,14 +116,11 @@ Return to the parent pom and make the following change, which is necessary for p (Before you make this change the value should be ``${parsedVersion.majorVersion}.${parsedVersion.nextMinorVersion}``. Later on, after cutting a release, we'll change it back to that value.) -Check in the Changes Above into a Release Branch and Merge It -------------------------------------------------------------- - For a regular release, make the changes above in the release branch you created, make a pull request, and merge it into the "develop" branch. Like usual, you can safely delete the branch after the merge is complete. If you are making a hotfix release, make the pull request against the "master" branch. Do not delete the branch after merging because we will later merge it into the "develop" branch to pick up the hotfix. More on this later. -Either way, as usual, you should ensure that all tests are passing. Please note that you will need to bump the version in `jenkins.yml `_ in dataverse-ansible to get the tests to pass. Consider doing this before making the pull request. Alternatively, you can bump jenkins.yml after making the pull request and re-run the Jenkins job to make sure tests pass. +Either way, as usual, you should ensure that all tests are passing. Please note that you will need to bump the version in `jenkins.yml `_ in dataverse-ansible to get the tests to pass. Consider doing this before making the pull request. Alternatively, you can bump jenkins.yml after making the pull request and re-run the Jenkins job to make sure tests pass. Merge "develop" into "master" ----------------------------- @@ -94,7 +149,7 @@ After the "master" branch has been updated and the GitHub Action to build and pu To test these images against our API test suite, go to the "alpha" workflow at https://github.com/gdcc/api-test-runner/actions/workflows/alpha.yml and run it. -If there are failures, additional dependencies or settings may have been added to the "develop" workflow. Copy them over and try again. +Don't be surprised if there are failures. The test runner is a work in progress! Additional dependencies or settings may have been added to the "develop" workflow. Copy them over and try again. .. _build-guides: @@ -186,11 +241,6 @@ Upload the following artifacts to the draft release you created: - metadata block tsv files - config files -Deploy on Demo --------------- - -Now that you have the release ready to go, consider giving it one final test by deploying it on https://demo.dataverse.org. Note that this is also an opportunity to re-test the upgrade checklist as described in the release note. - Publish the Release ------------------- @@ -228,7 +278,21 @@ Create a new branch (any name is fine but ``prepare-next-iteration`` is suggeste Now create a pull request and merge it. -For more background, see :ref:`base-supported-image-tags`. +For more background, see :ref:`base-supported-image-tags`. For an example, see https://github.com/IQSS/dataverse/pull/10896 + +Deploy Final Release on Demo +---------------------------- + +Above you already did the hard work of deploying a release candidate to https://demo.dataverse.org. It should be relatively straightforward to undeploy the release candidate and deploy the final release. + +Update SchemaSpy +---------------- + +We maintain SchemaSpy at URLs like https://guides.dataverse.org/en/6.3/schemaspy/index.html + +Get the attention of the core team and ask someone to update it for the new release. + +Consider updating `the thread `_ on the mailing list once the update is in place. Add the Release to the Dataverse Roadmap ---------------------------------------- diff --git a/doc/sphinx-guides/source/developers/version-control.rst b/doc/sphinx-guides/source/developers/version-control.rst index ecd2db6214d..fad8cac1400 100644 --- a/doc/sphinx-guides/source/developers/version-control.rst +++ b/doc/sphinx-guides/source/developers/version-control.rst @@ -291,16 +291,16 @@ By default, when a pull request is made from a fork, "Allow edits from maintaine This is a nice feature of GitHub because it means that the core dev team for the Dataverse Project can make small (or even large) changes to a pull request from a contributor to help the pull request along on its way to QA and being merged. -GitHub documents how to make changes to a fork at https://help.github.com/articles/committing-changes-to-a-pull-request-branch-created-from-a-fork/ but as of this writing the steps involve making a new clone of the repo. This works but you might find it more convenient to add a "remote" to your existing clone. The example below uses the fork at https://github.com/OdumInstitute/dataverse and the branch ``4709-postgresql_96`` but the technique can be applied to any fork and branch: +GitHub documents how to make changes to a fork at https://help.github.com/articles/committing-changes-to-a-pull-request-branch-created-from-a-fork/ but as of this writing the steps involve making a new clone of the repo. This works but you might find it more convenient to add a "remote" to your existing clone. The example below uses the fork at https://github.com/uncch-rdmc/dataverse and the branch ``4709-postgresql_96`` but the technique can be applied to any fork and branch: .. code-block:: bash - git remote add OdumInstitute git@github.com:OdumInstitute/dataverse.git - git fetch OdumInstitute + git remote add uncch-rdmc git@github.com:uncch-rdmc/dataverse.git + git fetch uncch-rdmc git checkout 4709-postgresql_96 vim path/to/file.txt git commit - git push OdumInstitute 4709-postgresql_96 + git push uncch-rdmc 4709-postgresql_96 .. _develop-into-develop: diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index e98ed8f5189..e3965e3cd7c 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -236,6 +236,10 @@ Dataverse automatically manages assigning PIDs and making them findable when dat allow updating the PID target URLs and metadata of already-published datasets manually if needed `, e.g. if a Dataverse instance is moved to a new URL or when the software is updated to generate additional metadata or address schema changes at the PID service. +Note that while some forms of PIDs (Handles, PermaLinks) are technically case sensitive, common practice is to avoid creating PIDs that differ only by case. +Dataverse treats PIDs of all types as case-insensitive (as DOIs are by definition). This means that Dataverse will find datasets (in search, to display dataset pages, etc.) +when the PIDs entered do not match the case of the original but will have a problem if two PIDs that differ only by case exist in one instance. + Testing PID Providers +++++++++++++++++++++ @@ -1783,7 +1787,7 @@ Now that you have a "languages.zip" file, you can load it into your Dataverse in ``curl http://localhost:8080/api/admin/datasetfield/loadpropertyfiles -X POST --upload-file /tmp/languages/languages.zip -H "Content-Type: application/zip"`` -Click on the languages using the drop down in the header to try them out. +Stop and start Payara and then click on the languages using the drop down in the header to try them out. .. _help-translate: diff --git a/doc/sphinx-guides/source/qa/testing-infrastructure.md b/doc/sphinx-guides/source/qa/testing-infrastructure.md index 6ec26c6da49..7f7f7c17063 100644 --- a/doc/sphinx-guides/source/qa/testing-infrastructure.md +++ b/doc/sphinx-guides/source/qa/testing-infrastructure.md @@ -31,6 +31,8 @@ To build and test a PR, we use a job called `IQSS_Dataverse_Internal` on modules > dataverse-parent > pom.xml`. Look for the version number, typically shown as `6.3`, and ensure it matches the current Dataverse build version. If it doesn't match, ask the developer to update the branch with the latest from the "develop" branch. + 1. If that didn't work, you may have run into a Flyway DB script collision error but that should be indicated by the server.log. See {doc}`/developers/sql-upgrade-scripts` in the Developer Guide. In the case of a collision, ask the developer to rename the script. 1. Assuming the above steps worked, and they should 99% of the time, test away! Note: be sure to `tail -F server.log` in a terminal window while you are doing any testing. This way you can spot problems that may not appear in the UI and have easier access to any stack traces for easier reporting. diff --git a/doc/sphinx-guides/source/style/text.rst b/doc/sphinx-guides/source/style/text.rst index 4fb2352300c..10fbd08da4a 100644 --- a/doc/sphinx-guides/source/style/text.rst +++ b/doc/sphinx-guides/source/style/text.rst @@ -9,4 +9,4 @@ Here we describe the guidelines that help us provide helpful, clear and consiste Metadata Text Guidelines ======================== -These guidelines are maintained in `a Google Doc `__ as we expect to make frequent changes to them. We welcome comments in the Google Doc. \ No newline at end of file +These guidelines are maintained in `a Google Doc `__ as we expect to make frequent changes to them. We welcome comments in the Google Doc. diff --git a/doc/sphinx-guides/source/versions.rst b/doc/sphinx-guides/source/versions.rst index 952eba72616..800bdc6e0f9 100755 --- a/doc/sphinx-guides/source/versions.rst +++ b/doc/sphinx-guides/source/versions.rst @@ -7,7 +7,8 @@ Dataverse Software Documentation Versions This list provides a way to refer to the documentation for previous and future versions of the Dataverse Software. In order to learn more about the updates delivered from one version to another, visit the `Releases `__ page in our GitHub repo. - pre-release `HTML (not final!) `__ and `PDF (experimental!) `__ built from the :doc:`develop ` branch :doc:`(how to contribute!) ` -- 6.3 +- 6.4 +- `6.3 `__ - `6.2 `__ - `6.1 `__ - `6.0 `__ diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 402a95c0e16..384b70b7a7b 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -57,6 +57,7 @@ services: -Ddataverse.pid.fake.label=FakeDOIProvider -Ddataverse.pid.fake.authority=10.5072 -Ddataverse.pid.fake.shoulder=FK2/ + -Ddataverse.lang.directory=/dv/lang ports: - "8080:8080" # HTTP (Dataverse Application) - "4949:4848" # HTTPS (Payara Admin Console) diff --git a/docker/compose/demo/compose.yml b/docker/compose/demo/compose.yml index 33e7b52004b..a0a85ef5217 100644 --- a/docker/compose/demo/compose.yml +++ b/docker/compose/demo/compose.yml @@ -26,6 +26,7 @@ services: -Ddataverse.pid.fake.label=FakeDOIProvider -Ddataverse.pid.fake.authority=10.5072 -Ddataverse.pid.fake.shoulder=FK2/ + -Ddataverse.lang.directory=/dv/lang ports: - "8080:8080" # HTTP (Dataverse Application) - "4848:4848" # HTTP (Payara Admin Console) @@ -76,7 +77,7 @@ services: postgres: container_name: "postgres" hostname: postgres - image: postgres:13 + image: postgres:17 restart: on-failure environment: - POSTGRES_USER=dataverse diff --git a/modules/dataverse-parent/pom.xml b/modules/dataverse-parent/pom.xml index 6bea02569ec..9442b55d622 100644 --- a/modules/dataverse-parent/pom.xml +++ b/modules/dataverse-parent/pom.xml @@ -131,7 +131,7 @@ - 6.3 + 6.4 17 UTF-8 @@ -149,7 +149,7 @@ 6.2024.6 - 42.7.2 + 42.7.4 9.4.1 1.12.748 26.30.0 @@ -447,7 +447,7 @@ (These properties are provided by the build-helper plugin below.) --> ${parsedVersion.majorVersion}.${parsedVersion.nextMinorVersion} - + diff --git a/pom.xml b/pom.xml index edf72067976..5ecbd7059c1 100644 --- a/pom.xml +++ b/pom.xml @@ -27,10 +27,10 @@ war 1.2.18.4 - 9.22.1 + 10.19.0 1.20.1 5.2.1 - 2.4.1 + 2.9.1 5.5.3 Dataverse API @@ -68,6 +68,17 @@ + + + org.apache.james + apache-mime4j-core + 0.8.10 + + + org.apache.james + apache-mime4j-dom + 0.8.7 + org.eclipse.persistence @@ -553,7 +569,7 @@ org.xmlunit xmlunit-core - 2.9.1 + 2.10.0 com.google.cloud @@ -615,7 +631,7 @@ org.xmlunit xmlunit-assertj3 - 2.8.2 + 2.10.0 test @@ -993,7 +1009,7 @@ true docker-build - 16 + 17 gdcc/dataverse:${app.image.tag} unstable @@ -1127,4 +1143,4 @@ - \ No newline at end of file + diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index a9ea2b9ca0e..abc09465603 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -59,8 +59,8 @@ distributorURL URL The URL of the distributor's webpage https:// url 55 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE distributor citation distributorLogoURL Logo URL The URL of the distributor's logo image, used to show the image on the Dataset's page https:// url 56
FALSE FALSE FALSE FALSE FALSE FALSE distributor citation distributionDate Distribution Date The date when the Dataset was made available for distribution/presentation YYYY-MM-DD date 57 TRUE FALSE FALSE TRUE FALSE FALSE citation - depositor Depositor The entity, such as a person or organization, that deposited the Dataset in the repository 1) FamilyName, GivenName or 2) Organization text 58 FALSE FALSE FALSE FALSE FALSE FALSE citation - dateOfDeposit Deposit Date The date when the Dataset was deposited into the repository YYYY-MM-DD date 59 FALSE FALSE FALSE TRUE FALSE FALSE citation http://purl.org/dc/terms/dateSubmitted + depositor Depositor The entity, such as a person or organization, that deposited the Dataset in the repository 1) FamilyName, GivenName or 2) Organization text 58 FALSE FALSE FALSE FALSE TRUE FALSE citation + dateOfDeposit Deposit Date The date when the Dataset was deposited into the repository YYYY-MM-DD date 59 FALSE FALSE FALSE TRUE TRUE FALSE citation http://purl.org/dc/terms/dateSubmitted timePeriodCovered Time Period The time period that the data refer to. Also known as span. This is the time period covered by the data, not the dates of coding, collecting data, or making documents machine-readable none 60 ; FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/temporalCoverage timePeriodCoveredStart Start Date The start date of the time period that the data refer to YYYY-MM-DD date 61 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation timePeriodCoveredEnd End Date The end date of the time period that the data refer to YYYY-MM-DD date 62 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index 52cb7d6f2dc..78579b1de21 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -333,15 +333,20 @@ public DatasetVersion getLatestVersion() { return getVersions().get(0); } - public DatasetVersion getLatestVersionForCopy() { + public DatasetVersion getLatestVersionForCopy(boolean includeDeaccessioned) { for (DatasetVersion testDsv : getVersions()) { - if (testDsv.isReleased() || testDsv.isArchived()) { + if (testDsv.isReleased() || testDsv.isArchived() + || (testDsv.isDeaccessioned() && includeDeaccessioned)) { return testDsv; } } return getVersions().get(0); } + public DatasetVersion getLatestVersionForCopy(){ + return getLatestVersionForCopy(false); + } + public List getVersions() { return versions; } @@ -478,8 +483,17 @@ public Date getMostRecentMajorVersionReleaseDate() { if (this.isHarvested()) { return getVersions().get(0).getReleaseTime(); } else { + Long majorVersion = null; for (DatasetVersion version : this.getVersions()) { - if (version.isReleased() && version.getMinorVersionNumber().equals((long) 0)) { + if (version.isReleased()) { + if (version.getMinorVersionNumber().equals((long) 0)) { + return version.getReleaseTime(); + } else if (majorVersion == null) { + majorVersion = version.getVersionNumber(); + } + } else if (version.isDeaccessioned() && majorVersion != null + && majorVersion.longValue() == version.getVersionNumber().longValue() + && version.getMinorVersionNumber().equals((long) 0)) { return version.getReleaseTime(); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java index abb812d1ba3..71e339a6fca 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java @@ -16,165 +16,164 @@ @Named("dfc") @Dependent public class DatasetFieldConstant implements java.io.Serializable { - - public final static String publication = "publication"; - public final static String otherId = "otherId"; - public final static String author = "author"; - public final static String authorFirstName = "authorFirstName"; - public final static String authorLastName = "authorLastName"; - public final static String producer = "producer"; - public final static String software = "software"; - public final static String grantNumber = "grantNumber"; - public final static String distributor = "distributor"; - public final static String datasetContact = "datasetContact"; - public final static String datasetContactEmail = "datasetContactEmail"; - public final static String datasetContactName = "datasetContactName"; - public final static String datasetContactAffiliation = "datasetContactAffiliation"; - public final static String series = "series"; - public final static String datasetVersion = "datasetVersion"; - - public final static String description = "dsDescription"; - public final static String keyword = "keyword"; - public final static String topicClassification = "topicClassification"; - public final static String geographicBoundingBox = "geographicBoundingBox"; - - public final static String note = "note"; - - public final static String publicationRelationType = "publicationRelationType"; - public final static String publicationCitation = "publicationCitation"; - public final static String publicationIDType = "publicationIDType"; - public final static String publicationIDNumber = "publicationIDNumber"; - public final static String publicationURL = "publicationURL"; - public final static String publicationReplicationData = "publicationReplicationData"; - - - public final static String title = "title"; - public final static String subTitle="subtitle"; //SEK 6-7-2016 to match what is in DB - public final static String alternativeTitle="alternativeTitle"; //missing from class - public final static String datasetId = "datasetId"; - public final static String authorName ="authorName"; - public final static String authorAffiliation = "authorAffiliation"; - public final static String authorIdType = "authorIdentifierScheme"; - public final static String authorIdValue = "authorIdentifier"; - public final static String otherIdValue="otherIdValue"; - public final static String otherIdAgency= "otherIdAgency"; - - public final static String producerName="producerName"; - public final static String producerURL="producerURL"; - public final static String producerLogo="producerLogoURL"; - public final static String producerAffiliation="producerAffiliation"; - public final static String producerAbbreviation= "producerAbbreviation"; - public final static String productionDate="productionDate"; - public final static String productionPlace="productionPlace"; - public final static String softwareName="softwareName"; - public final static String softwareVersion="softwareVersion"; - public final static String fundingAgency="fundingAgency"; - public final static String grantNumberValue="grantNumberValue"; - public final static String grantNumberAgency="grantNumberAgency"; - public final static String distributorName="distributorName"; - public final static String distributorURL="distributorURL"; - public final static String distributorLogo="distributorLogoURL"; - public final static String distributionDate="distributionDate"; - public final static String distributorContactName="distributorContactName"; - public final static String distributorContactAffiliation="distributorContactAffiliation"; - public final static String distributorContactEmail="distributorContactEmail"; - public final static String distributorAffiliation="distributorAffiliation"; - public final static String distributorAbbreviation="distributorAbbreviation"; - - public final static String contributor="contributor"; //SEK added for Dublin Core 6/22 - public final static String contributorType="contributorType"; - public final static String contributorName="contributorName"; - - public final static String depositor="depositor"; - public final static String dateOfDeposit="dateOfDeposit"; - public final static String seriesName="seriesName"; - public final static String seriesInformation="seriesInformation"; - public final static String datasetVersionValue="datasetVersionValue"; - public final static String versionDate="versionDate"; - public final static String keywordValue="keywordValue"; - public final static String keywordTermURI="keywordTermURI"; - public final static String keywordVocab="keywordVocabulary"; - public final static String keywordVocabURI="keywordVocabularyURI"; - public final static String topicClassValue="topicClassValue"; - public final static String topicClassVocab="topicClassVocab"; - public final static String topicClassVocabURI="topicClassVocabURI"; - public final static String descriptionText="dsDescriptionValue"; - public final static String descriptionDate="dsDescriptionDate"; - public final static String timePeriodCovered="timePeriodCovered"; // SEK added 6/13/2016 - public final static String timePeriodCoveredStart="timePeriodCoveredStart"; - public final static String timePeriodCoveredEnd="timePeriodCoveredEnd"; - public final static String dateOfCollection="dateOfCollection"; // SEK added 6/13/2016 - public final static String dateOfCollectionStart="dateOfCollectionStart"; - public final static String dateOfCollectionEnd="dateOfCollectionEnd"; - public final static String country="country"; - public final static String geographicCoverage="geographicCoverage"; - public final static String otherGeographicCoverage="otherGeographicCoverage"; - public final static String city="city"; // SEK added 6/13/2016 - public final static String state="state"; // SEK added 6/13/2016 - public final static String geographicUnit="geographicUnit"; - public final static String westLongitude="westLongitude"; - public final static String eastLongitude="eastLongitude"; - public final static String northLatitude="northLatitude"; - public final static String southLatitude="southLatitude"; - public final static String unitOfAnalysis="unitOfAnalysis"; - public final static String universe="universe"; - public final static String kindOfData="kindOfData"; - public final static String timeMethod="timeMethod"; - public final static String dataCollector="dataCollector"; - public final static String collectorTraining="collectorTraining"; - public final static String frequencyOfDataCollection="frequencyOfDataCollection"; - public final static String samplingProcedure="samplingProcedure"; - public final static String targetSampleSize = "targetSampleSize"; - public final static String targetSampleActualSize = "targetSampleActualSize"; - public final static String targetSampleSizeFormula = "targetSampleSizeFormula"; - public final static String deviationsFromSampleDesign="deviationsFromSampleDesign"; - public final static String collectionMode="collectionMode"; - public final static String researchInstrument="researchInstrument"; - public final static String dataSources="dataSources"; - public final static String originOfSources="originOfSources"; - public final static String characteristicOfSources="characteristicOfSources"; - public final static String accessToSources="accessToSources"; - public final static String dataCollectionSituation="dataCollectionSituation"; - public final static String actionsToMinimizeLoss="actionsToMinimizeLoss"; - public final static String controlOperations="controlOperations"; - public final static String weighting="weighting"; - public final static String cleaningOperations="cleaningOperations"; - public final static String datasetLevelErrorNotes="datasetLevelErrorNotes"; - public final static String responseRate="responseRate"; - public final static String samplingErrorEstimates="samplingErrorEstimates"; - - public final static String socialScienceNotes = "socialScienceNotes"; - public final static String socialScienceNotesType = "socialScienceNotesType"; - public final static String socialScienceNotesSubject = "socialScienceNotesSubject"; - public final static String socialScienceNotesText = "socialScienceNotesText"; - - public final static String otherDataAppraisal="otherDataAppraisal"; - public final static String placeOfAccess="placeOfAccess"; - public final static String originalArchive="originalArchive"; - public final static String availabilityStatus="availabilityStatus"; - public final static String collectionSize="collectionSize"; - public final static String datasetCompletion="datasetCompletion"; - public final static String numberOfFiles="numberOfFiles"; - public final static String confidentialityDeclaration="confidentialityDeclaration"; - public final static String specialPermissions="specialPermissions"; - public final static String restrictions="restrictions"; + + public static final String publication = "publication"; + public static final String otherId = "otherId"; + public static final String author = "author"; + public static final String authorFirstName = "authorFirstName"; + public static final String authorLastName = "authorLastName"; + public static final String producer = "producer"; + public static final String software = "software"; + public static final String grantNumber = "grantNumber"; + public static final String distributor = "distributor"; + public static final String datasetContact = "datasetContact"; + public static final String datasetContactEmail = "datasetContactEmail"; + public static final String datasetContactName = "datasetContactName"; + public static final String datasetContactAffiliation = "datasetContactAffiliation"; + public static final String series = "series"; + public static final String datasetVersion = "datasetVersion"; + + public static final String description = "dsDescription"; + public static final String keyword = "keyword"; + public static final String topicClassification = "topicClassification"; + public static final String geographicBoundingBox = "geographicBoundingBox"; + + public static final String note = "note"; + + public static final String publicationRelationType = "publicationRelationType"; + public static final String publicationCitation = "publicationCitation"; + public static final String publicationIDType = "publicationIDType"; + public static final String publicationIDNumber = "publicationIDNumber"; + public static final String publicationURL = "publicationURL"; + public static final String publicationReplicationData = "publicationReplicationData"; + + public static final String title = "title"; + public static final String subTitle="subtitle"; //SEK 6-7-2016 to match what is in DB + public static final String alternativeTitle="alternativeTitle"; //missing from class + public static final String datasetId = "datasetId"; + public static final String authorName ="authorName"; + public static final String authorAffiliation = "authorAffiliation"; + public static final String authorIdType = "authorIdentifierScheme"; + public static final String authorIdValue = "authorIdentifier"; + public static final String otherIdValue="otherIdValue"; + public static final String otherIdAgency= "otherIdAgency"; + + public static final String producerName="producerName"; + public static final String producerURL="producerURL"; + public static final String producerLogo="producerLogoURL"; + public static final String producerAffiliation="producerAffiliation"; + public static final String producerAbbreviation= "producerAbbreviation"; + public static final String productionDate="productionDate"; + public static final String productionPlace="productionPlace"; + public static final String softwareName="softwareName"; + public static final String softwareVersion="softwareVersion"; + public static final String fundingAgency="fundingAgency"; + public static final String grantNumberValue="grantNumberValue"; + public static final String grantNumberAgency="grantNumberAgency"; + public static final String distributorName="distributorName"; + public static final String distributorURL="distributorURL"; + public static final String distributorLogo="distributorLogoURL"; + public static final String distributionDate="distributionDate"; + public static final String distributorContactName="distributorContactName"; + public static final String distributorContactAffiliation="distributorContactAffiliation"; + public static final String distributorContactEmail="distributorContactEmail"; + public static final String distributorAffiliation="distributorAffiliation"; + public static final String distributorAbbreviation="distributorAbbreviation"; + + public static final String contributor="contributor"; //SEK added for Dublin Core 6/22 + public static final String contributorType="contributorType"; + public static final String contributorName="contributorName"; + + public static final String depositor="depositor"; + public static final String dateOfDeposit="dateOfDeposit"; + public static final String seriesName="seriesName"; + public static final String seriesInformation="seriesInformation"; + public static final String datasetVersionValue="datasetVersionValue"; + public static final String versionDate="versionDate"; + public static final String keywordValue="keywordValue"; + public static final String keywordTermURI="keywordTermURI"; + public static final String keywordVocab="keywordVocabulary"; + public static final String keywordVocabURI="keywordVocabularyURI"; + public static final String topicClassValue="topicClassValue"; + public static final String topicClassVocab="topicClassVocab"; + public static final String topicClassVocabURI="topicClassVocabURI"; + public static final String descriptionText="dsDescriptionValue"; + public static final String descriptionDate="dsDescriptionDate"; + public static final String timePeriodCovered="timePeriodCovered"; // SEK added 6/13/2016 + public static final String timePeriodCoveredStart="timePeriodCoveredStart"; + public static final String timePeriodCoveredEnd="timePeriodCoveredEnd"; + public static final String dateOfCollection="dateOfCollection"; // SEK added 6/13/2016 + public static final String dateOfCollectionStart="dateOfCollectionStart"; + public static final String dateOfCollectionEnd="dateOfCollectionEnd"; + public static final String country="country"; + public static final String geographicCoverage="geographicCoverage"; + public static final String otherGeographicCoverage="otherGeographicCoverage"; + public static final String city="city"; // SEK added 6/13/2016 + public static final String state="state"; // SEK added 6/13/2016 + public static final String geographicUnit="geographicUnit"; + public static final String westLongitude="westLongitude"; + public static final String eastLongitude="eastLongitude"; + public static final String northLatitude="northLatitude"; + public static final String southLatitude="southLatitude"; + public static final String unitOfAnalysis="unitOfAnalysis"; + public static final String universe="universe"; + public static final String kindOfData="kindOfData"; + public static final String timeMethod="timeMethod"; + public static final String dataCollector="dataCollector"; + public static final String collectorTraining="collectorTraining"; + public static final String frequencyOfDataCollection="frequencyOfDataCollection"; + public static final String samplingProcedure="samplingProcedure"; + public static final String targetSampleSize = "targetSampleSize"; + public static final String targetSampleActualSize = "targetSampleActualSize"; + public static final String targetSampleSizeFormula = "targetSampleSizeFormula"; + public static final String deviationsFromSampleDesign="deviationsFromSampleDesign"; + public static final String collectionMode="collectionMode"; + public static final String researchInstrument="researchInstrument"; + public static final String dataSources="dataSources"; + public static final String originOfSources="originOfSources"; + public static final String characteristicOfSources="characteristicOfSources"; + public static final String accessToSources="accessToSources"; + public static final String dataCollectionSituation="dataCollectionSituation"; + public static final String actionsToMinimizeLoss="actionsToMinimizeLoss"; + public static final String controlOperations="controlOperations"; + public static final String weighting="weighting"; + public static final String cleaningOperations="cleaningOperations"; + public static final String datasetLevelErrorNotes="datasetLevelErrorNotes"; + public static final String responseRate="responseRate"; + public static final String samplingErrorEstimates="samplingErrorEstimates"; + + public static final String socialScienceNotes = "socialScienceNotes"; + public static final String socialScienceNotesType = "socialScienceNotesType"; + public static final String socialScienceNotesSubject = "socialScienceNotesSubject"; + public static final String socialScienceNotesText = "socialScienceNotesText"; + + public static final String otherDataAppraisal="otherDataAppraisal"; + public static final String placeOfAccess="placeOfAccess"; + public static final String originalArchive="originalArchive"; + public static final String availabilityStatus="availabilityStatus"; + public static final String collectionSize="collectionSize"; + public static final String datasetCompletion="datasetCompletion"; + public static final String numberOfFiles="numberOfFiles"; + public static final String confidentialityDeclaration="confidentialityDeclaration"; + public static final String specialPermissions="specialPermissions"; + public static final String restrictions="restrictions"; @Deprecated //Doesn't appear to be used and is not datasetContact - public final static String contact="contact"; - public final static String citationRequirements="citationRequirements"; - public final static String depositorRequirements="depositorRequirements"; - public final static String conditions="conditions"; - public final static String disclaimer="disclaimer"; - public final static String relatedMaterial="relatedMaterial"; - //public final static String replicationFor="replicationFor"; - //public final static String relatedPublications="relatedPublications"; - public final static String relatedDatasets="relatedDatasets"; - public final static String otherReferences="otherReferences"; - public final static String notesText="notesText"; - public final static String language="language"; - public final static String noteInformationType="noteInformationType"; - public final static String notesInformationSubject="notesInformationSubject"; - public final static String subject="subject"; + public static final String contact="contact"; + public static final String citationRequirements="citationRequirements"; + public static final String depositorRequirements="depositorRequirements"; + public static final String conditions="conditions"; + public static final String disclaimer="disclaimer"; + public static final String relatedMaterial="relatedMaterial"; + //public static final String replicationFor="replicationFor"; + //public static final String relatedPublications="relatedPublications"; + public static final String relatedDatasets="relatedDatasets"; + public static final String otherReferences="otherReferences"; + public static final String notesText="notesText"; + public static final String language="language"; + public static final String noteInformationType="noteInformationType"; + public static final String notesInformationSubject="notesInformationSubject"; + public static final String subject="subject"; /* * The following getters are needed so we can use them as properties in JSP */ diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java index ff78b0c83ec..ded7c83de62 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldServiceBean.java @@ -424,7 +424,7 @@ public Set getIndexableStringsByTermUri(String termUri, JsonObject cvocE for (int i = 0; i < jarr.size(); i++) { if (jarr.get(i).getValueType().equals(JsonValue.ValueType.STRING)) { strings.add(jarr.getString(i)); - } else if (jarr.get(i).getValueType().equals(ValueType.OBJECT)) { // This condition handles SKOMOS format like [{"lang": "en","value": "non-apis bee"},{"lang": "fr","value": "abeille non apis"}] + } else if (jarr.get(i).getValueType().equals(ValueType.OBJECT)) { // This condition handles SKOSMOS format like [{"lang": "en","value": "non-apis bee"},{"lang": "fr","value": "abeille non apis"}] JsonObject entry = jarr.getJsonObject(i); if (entry.containsKey("value")) { logger.fine("adding " + entry.getString("value") + " for " + termUri); @@ -891,6 +891,10 @@ public List findAllDisplayedOnCreateInMetadataBlock(MetadataBl } public List findAllInMetadataBlockAndDataverse(MetadataBlock metadataBlock, Dataverse dataverse, boolean onlyDisplayedOnCreate) { + if (!dataverse.isMetadataBlockRoot() && dataverse.getOwner() != null) { + return findAllInMetadataBlockAndDataverse(metadataBlock, dataverse.getOwner(), onlyDisplayedOnCreate); + } + CriteriaBuilder criteriaBuilder = em.getCriteriaBuilder(); CriteriaQuery criteriaQuery = criteriaBuilder.createQuery(DatasetFieldType.class); diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 0433c425fd2..ac5923b95bf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1390,7 +1390,10 @@ public List getRelatedPublications() { relatedPublication.setIdNumber(subField.getDisplayValue()); break; case DatasetFieldConstant.publicationRelationType: - relatedPublication.setRelationType(subField.getDisplayValue()); + List values = subField.getValues_nondisplay(); + if (!values.isEmpty()) { + relatedPublication.setRelationType(values.get(0)); //only one value allowed + } break; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionDifference.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionDifference.java index eca0c84ae84..c32f49e985e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionDifference.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionDifference.java @@ -152,8 +152,7 @@ public DatasetVersionDifference(DatasetVersion newVersion, DatasetVersion origin getReplacedFiles(); initDatasetFilesDifferencesList(); - //Sort within blocks by datasetfieldtype dispaly order then.... - //sort via metadatablock order - citation first... + //Sort within blocks by datasetfieldtype display order for (List blockList : detailDataByBlock) { Collections.sort(blockList, (DatasetField[] l1, DatasetField[] l2) -> { DatasetField dsfa = l1[0]; //(DatasetField[]) l1.get(0); @@ -163,6 +162,17 @@ public DatasetVersionDifference(DatasetVersion newVersion, DatasetVersion origin return Integer.valueOf(a).compareTo(b); }); } + //Sort existing compoundValues by datasetfieldtype display order + for (List blockList : detailDataByBlock) { + for (DatasetField[] dfarr : blockList) { + for (DatasetField df : dfarr) { + for (DatasetFieldCompoundValue dfcv : df.getDatasetFieldCompoundValues()) { + Collections.sort(dfcv.getChildDatasetFields(), DatasetField.DisplayOrder); + } + } + } + } + //Sort via metadatablock order Collections.sort(detailDataByBlock, (List l1, List l2) -> { DatasetField dsfa[] = (DatasetField[]) l1.get(0); DatasetField dsfb[] = (DatasetField[]) l2.get(0); diff --git a/src/main/java/edu/harvard/iq/dataverse/DvObject.java b/src/main/java/edu/harvard/iq/dataverse/DvObject.java index a4882f772d6..5dab43fbdbd 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DvObject.java +++ b/src/main/java/edu/harvard/iq/dataverse/DvObject.java @@ -27,9 +27,9 @@ @NamedQuery(name = "DvObject.ownedObjectsById", query="SELECT COUNT(obj) FROM DvObject obj WHERE obj.owner.id=:id"), @NamedQuery(name = "DvObject.findByGlobalId", - query = "SELECT o FROM DvObject o WHERE o.identifier=:identifier and o.authority=:authority and o.protocol=:protocol and o.dtype=:dtype"), + query = "SELECT o FROM DvObject o WHERE UPPER(o.identifier)=UPPER(:identifier) and o.authority=:authority and o.protocol=:protocol and o.dtype=:dtype"), @NamedQuery(name = "DvObject.findIdByGlobalId", - query = "SELECT o.id FROM DvObject o WHERE o.identifier=:identifier and o.authority=:authority and o.protocol=:protocol and o.dtype=:dtype"), + query = "SELECT o.id FROM DvObject o WHERE UPPER(o.identifier)=UPPER(:identifier) and o.authority=:authority and o.protocol=:protocol and o.dtype=:dtype"), @NamedQuery(name = "DvObject.findByAlternativeGlobalId", query = "SELECT o FROM DvObject o, AlternativePersistentIdentifier a WHERE o.id = a.dvObject.id and a.identifier=:identifier and a.authority=:authority and a.protocol=:protocol and o.dtype=:dtype"), @@ -37,7 +37,7 @@ query = "SELECT o.id FROM DvObject o, AlternativePersistentIdentifier a WHERE o.id = a.dvObject.id and a.identifier=:identifier and a.authority=:authority and a.protocol=:protocol and o.dtype=:dtype"), @NamedQuery(name = "DvObject.findByProtocolIdentifierAuthority", - query = "SELECT o FROM DvObject o WHERE o.identifier=:identifier and o.authority=:authority and o.protocol=:protocol"), + query = "SELECT o FROM DvObject o WHERE UPPER(o.identifier)=UPPER(:identifier) and o.authority=:authority and o.protocol=:protocol"), @NamedQuery(name = "DvObject.findByOwnerId", query = "SELECT o FROM DvObject o WHERE o.owner.id=:ownerId order by o.dtype desc, o.id"), @NamedQuery(name = "DvObject.findByAuthenticatedUserId", @@ -53,7 +53,8 @@ @Table(indexes = {@Index(columnList="dtype") , @Index(columnList="owner_id") , @Index(columnList="creator_id") - , @Index(columnList="releaseuser_id")}, + , @Index(columnList="releaseuser_id") + , @Index(columnList="authority,protocol, UPPER(identifier)", name="INDEX_DVOBJECT_authority_protocol_upper_identifier")}, uniqueConstraints = {@UniqueConstraint(columnNames = {"authority,protocol,identifier"}),@UniqueConstraint(columnNames = {"owner_id,storageidentifier"})}) public abstract class DvObject extends DataverseEntity implements java.io.Serializable { diff --git a/src/main/java/edu/harvard/iq/dataverse/Shib.java b/src/main/java/edu/harvard/iq/dataverse/Shib.java index a3dfbf81512..121d03ef0c7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Shib.java +++ b/src/main/java/edu/harvard/iq/dataverse/Shib.java @@ -421,6 +421,9 @@ private String getValueFromAssertion(String key) { Object attribute = request.getAttribute(key); if (attribute != null) { String attributeValue = attribute.toString(); + if(systemConfig.isShibAttributeCharacterSetConversionEnabled()) { + attributeValue = new String(attributeValue.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8); + } String trimmedValue = attributeValue.trim(); if (!trimmedValue.isEmpty()) { logger.fine("The SAML assertion for \"" + key + "\" (optional) was \"" + attributeValue + "\" and was trimmed to \"" + trimmedValue + "\"."); diff --git a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java index 542cf39cfbe..46736da73d4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/ThumbnailServiceWrapper.java @@ -5,6 +5,7 @@ */ package edu.harvard.iq.dataverse; +import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.dataaccess.DataAccess; import edu.harvard.iq.dataverse.dataaccess.ImageThumbConverter; import edu.harvard.iq.dataverse.dataaccess.StorageIO; @@ -20,7 +21,6 @@ import jakarta.ejb.EJB; import jakarta.enterprise.context.RequestScoped; -import jakarta.inject.Inject; import jakarta.inject.Named; /** @@ -33,9 +33,8 @@ public class ThumbnailServiceWrapper implements java.io.Serializable { private static final Logger logger = Logger.getLogger(ThumbnailServiceWrapper.class.getCanonicalName()); - - @Inject - PermissionsWrapper permissionsWrapper; + @EJB + PermissionServiceBean permissionService; @EJB DataverseServiceBean dataverseService; @EJB @@ -49,12 +48,15 @@ public class ThumbnailServiceWrapper implements java.io.Serializable { private Map dvobjectViewMap = new HashMap<>(); private Map hasThumbMap = new HashMap<>(); + private boolean hasDownloadFilePermission(DvObject dvo) { + return permissionService.on(dvo).has(Permission.DownloadFile) ; + } public String getFileCardImageAsUrl(SolrSearchResult result) { DataFile dataFile = result != null && result.getEntity() != null ? ((DataFile) result.getEntity()) : null; - if (dataFile == null || result.isHarvested() + if (dataFile == null + || result.isHarvested() || !isThumbnailAvailable(dataFile) - || dataFile.isRestricted() - || !dataFile.isReleased() + || (dataFile.isRestricted() && !hasDownloadFilePermission(dataFile)) || FileUtil.isActivelyEmbargoed(dataFile) || FileUtil.isRetentionExpired(dataFile)) { return null; @@ -105,7 +107,7 @@ public String getFileCardImageAsBase64Url(SolrSearchResult result) { } if ((!((DataFile)result.getEntity()).isRestricted() - || permissionsWrapper.hasDownloadFilePermission(result.getEntity())) + || hasDownloadFilePermission(result.getEntity())) && isThumbnailAvailable((DataFile) result.getEntity())) { cardImageUrl = ImageThumbConverter.getImageThumbnailAsBase64( diff --git a/src/main/java/edu/harvard/iq/dataverse/WidgetWrapper.java b/src/main/java/edu/harvard/iq/dataverse/WidgetWrapper.java index a8ea5fabde4..c51903e2ed4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/WidgetWrapper.java +++ b/src/main/java/edu/harvard/iq/dataverse/WidgetWrapper.java @@ -18,8 +18,8 @@ @Named public class WidgetWrapper implements java.io.Serializable { - private final static String WIDGET_PARAMETER = "widget"; - private final static char WIDGET_SEPARATOR = '@'; + private static final String WIDGET_PARAMETER = "widget"; + private static final char WIDGET_SEPARATOR = '@'; private Boolean widgetView; private String widgetHome; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index c7657768d16..369a22fe8d7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -2077,10 +2077,16 @@ public Response getLinks(@Context ContainerRequestContext crc, @PathParam("id") List dvsThatLinkToThisDatasetId = dataverseSvc.findDataversesThatLinkToThisDatasetId(datasetId); JsonArrayBuilder dataversesThatLinkToThisDatasetIdBuilder = Json.createArrayBuilder(); for (Dataverse dataverse : dvsThatLinkToThisDatasetId) { - dataversesThatLinkToThisDatasetIdBuilder.add(dataverse.getAlias() + " (id " + dataverse.getId() + ")"); + JsonObjectBuilder datasetBuilder = Json.createObjectBuilder(); + datasetBuilder.add("id", dataverse.getId()); + datasetBuilder.add("alias", dataverse.getAlias()); + datasetBuilder.add("displayName", dataverse.getDisplayName()); + dataversesThatLinkToThisDatasetIdBuilder.add(datasetBuilder.build()); } JsonObjectBuilder response = Json.createObjectBuilder(); - response.add("dataverses that link to dataset id " + datasetId, dataversesThatLinkToThisDatasetIdBuilder); + response.add("id", datasetId); + response.add("identifier", dataset.getIdentifier()); + response.add("linked-dataverses", dataversesThatLinkToThisDatasetIdBuilder); return ok(response); } catch (WrappedResponse wr) { return wr.getResponse(); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java index 17e3086f184..f05bba8830e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java @@ -3,12 +3,9 @@ import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.api.auth.AuthRequired; import edu.harvard.iq.dataverse.api.datadeposit.SwordServiceBean; -import edu.harvard.iq.dataverse.api.dto.DataverseMetadataBlockFacetDTO; +import edu.harvard.iq.dataverse.api.dto.*; import edu.harvard.iq.dataverse.authorization.DataverseRole; -import edu.harvard.iq.dataverse.api.dto.ExplicitGroupDTO; -import edu.harvard.iq.dataverse.api.dto.RoleAssignmentDTO; -import edu.harvard.iq.dataverse.api.dto.RoleDTO; import edu.harvard.iq.dataverse.api.imports.ImportException; import edu.harvard.iq.dataverse.api.imports.ImportServiceBean; import edu.harvard.iq.dataverse.authorization.Permission; @@ -127,75 +124,158 @@ public Response addRoot(@Context ContainerRequestContext crc, String body) { @Path("{identifier}") public Response addDataverse(@Context ContainerRequestContext crc, String body, @PathParam("identifier") String parentIdtf) { Dataverse newDataverse; - JsonObject newDataverseJson; try { - newDataverseJson = JsonUtil.getJsonObject(body); - newDataverse = jsonParser().parseDataverse(newDataverseJson); + newDataverse = parseAndValidateAddDataverseRequestBody(body); } catch (JsonParsingException jpe) { - logger.log(Level.SEVERE, "Json: {0}", body); return error(Status.BAD_REQUEST, MessageFormat.format(BundleUtil.getStringFromBundle("dataverse.create.error.jsonparse"), jpe.getMessage())); } catch (JsonParseException ex) { - logger.log(Level.SEVERE, "Error parsing dataverse from json: " + ex.getMessage(), ex); return error(Status.BAD_REQUEST, MessageFormat.format(BundleUtil.getStringFromBundle("dataverse.create.error.jsonparsetodataverse"), ex.getMessage())); } try { - JsonObject metadataBlocksJson = newDataverseJson.getJsonObject("metadataBlocks"); - List inputLevels = null; - List metadataBlocks = null; - List facetList = null; - if (metadataBlocksJson != null) { - JsonArray inputLevelsArray = metadataBlocksJson.getJsonArray("inputLevels"); - inputLevels = inputLevelsArray != null ? parseInputLevels(inputLevelsArray, newDataverse) : null; - - JsonArray metadataBlockNamesArray = metadataBlocksJson.getJsonArray("metadataBlockNames"); - metadataBlocks = metadataBlockNamesArray != null ? parseNewDataverseMetadataBlocks(metadataBlockNamesArray) : null; - - JsonArray facetIdsArray = metadataBlocksJson.getJsonArray("facetIds"); - facetList = facetIdsArray != null ? parseFacets(facetIdsArray) : null; - } + List inputLevels = parseInputLevels(body, newDataverse); + List metadataBlocks = parseMetadataBlocks(body); + List facets = parseFacets(body); if (!parentIdtf.isEmpty()) { Dataverse owner = findDataverseOrDie(parentIdtf); newDataverse.setOwner(owner); } - // set the dataverse - contact relationship in the contacts - for (DataverseContact dc : newDataverse.getDataverseContacts()) { - dc.setDataverse(newDataverse); - } - AuthenticatedUser u = getRequestAuthenticatedUserOrDie(crc); - newDataverse = execCommand(new CreateDataverseCommand(newDataverse, createDataverseRequest(u), facetList, inputLevels, metadataBlocks)); + newDataverse = execCommand(new CreateDataverseCommand(newDataverse, createDataverseRequest(u), facets, inputLevels, metadataBlocks)); return created("/dataverses/" + newDataverse.getAlias(), json(newDataverse)); - } catch (WrappedResponse ww) { - - String error = ConstraintViolationUtil.getErrorStringForConstraintViolations(ww.getCause()); - if (!error.isEmpty()) { - logger.log(Level.INFO, error); - return ww.refineResponse(error); - } - return ww.getResponse(); + } catch (WrappedResponse ww) { + return handleWrappedResponse(ww); } catch (EJBException ex) { - Throwable cause = ex; - StringBuilder sb = new StringBuilder(); - sb.append("Error creating dataverse."); - while (cause.getCause() != null) { - cause = cause.getCause(); - if (cause instanceof ConstraintViolationException) { - sb.append(ConstraintViolationUtil.getErrorStringForConstraintViolations(cause)); - } - } - logger.log(Level.SEVERE, sb.toString()); - return error(Response.Status.INTERNAL_SERVER_ERROR, "Error creating dataverse: " + sb.toString()); + return handleEJBException(ex, "Error creating dataverse."); } catch (Exception ex) { logger.log(Level.SEVERE, "Error creating dataverse", ex); return error(Response.Status.INTERNAL_SERVER_ERROR, "Error creating dataverse: " + ex.getMessage()); + } + } + private Dataverse parseAndValidateAddDataverseRequestBody(String body) throws JsonParsingException, JsonParseException { + try { + JsonObject addDataverseJson = JsonUtil.getJsonObject(body); + return jsonParser().parseDataverse(addDataverseJson); + } catch (JsonParsingException jpe) { + logger.log(Level.SEVERE, "Json: {0}", body); + throw jpe; + } catch (JsonParseException ex) { + logger.log(Level.SEVERE, "Error parsing dataverse from json: " + ex.getMessage(), ex); + throw ex; } } + @PUT + @AuthRequired + @Path("{identifier}") + public Response updateDataverse(@Context ContainerRequestContext crc, String body, @PathParam("identifier") String identifier) { + Dataverse dataverse; + try { + dataverse = findDataverseOrDie(identifier); + } catch (WrappedResponse e) { + return e.getResponse(); + } + + DataverseDTO updatedDataverseDTO; + try { + updatedDataverseDTO = parseAndValidateUpdateDataverseRequestBody(body); + } catch (JsonParsingException jpe) { + return error(Status.BAD_REQUEST, MessageFormat.format(BundleUtil.getStringFromBundle("dataverse.create.error.jsonparse"), jpe.getMessage())); + } catch (JsonParseException ex) { + return error(Status.BAD_REQUEST, MessageFormat.format(BundleUtil.getStringFromBundle("dataverse.create.error.jsonparsetodataverse"), ex.getMessage())); + } + + try { + List inputLevels = parseInputLevels(body, dataverse); + List metadataBlocks = parseMetadataBlocks(body); + List facets = parseFacets(body); + + AuthenticatedUser u = getRequestAuthenticatedUserOrDie(crc); + dataverse = execCommand(new UpdateDataverseCommand(dataverse, facets, null, createDataverseRequest(u), inputLevels, metadataBlocks, updatedDataverseDTO)); + return ok(json(dataverse)); + + } catch (WrappedResponse ww) { + return handleWrappedResponse(ww); + } catch (EJBException ex) { + return handleEJBException(ex, "Error updating dataverse."); + } catch (Exception ex) { + logger.log(Level.SEVERE, "Error updating dataverse", ex); + return error(Response.Status.INTERNAL_SERVER_ERROR, "Error updating dataverse: " + ex.getMessage()); + } + } + + private DataverseDTO parseAndValidateUpdateDataverseRequestBody(String body) throws JsonParsingException, JsonParseException { + try { + JsonObject updateDataverseJson = JsonUtil.getJsonObject(body); + return jsonParser().parseDataverseDTO(updateDataverseJson); + } catch (JsonParsingException jpe) { + logger.log(Level.SEVERE, "Json: {0}", body); + throw jpe; + } catch (JsonParseException ex) { + logger.log(Level.SEVERE, "Error parsing DataverseDTO from json: " + ex.getMessage(), ex); + throw ex; + } + } + + private List parseInputLevels(String body, Dataverse dataverse) throws WrappedResponse { + JsonObject metadataBlocksJson = getMetadataBlocksJson(body); + if (metadataBlocksJson == null) { + return null; + } + JsonArray inputLevelsArray = metadataBlocksJson.getJsonArray("inputLevels"); + return inputLevelsArray != null ? parseInputLevels(inputLevelsArray, dataverse) : null; + } + + private List parseMetadataBlocks(String body) throws WrappedResponse { + JsonObject metadataBlocksJson = getMetadataBlocksJson(body); + if (metadataBlocksJson == null) { + return null; + } + JsonArray metadataBlocksArray = metadataBlocksJson.getJsonArray("metadataBlockNames"); + return metadataBlocksArray != null ? parseNewDataverseMetadataBlocks(metadataBlocksArray) : null; + } + + private List parseFacets(String body) throws WrappedResponse { + JsonObject metadataBlocksJson = getMetadataBlocksJson(body); + if (metadataBlocksJson == null) { + return null; + } + JsonArray facetsArray = metadataBlocksJson.getJsonArray("facetIds"); + return facetsArray != null ? parseFacets(facetsArray) : null; + } + + private JsonObject getMetadataBlocksJson(String body) { + JsonObject dataverseJson = JsonUtil.getJsonObject(body); + return dataverseJson.getJsonObject("metadataBlocks"); + } + + private Response handleWrappedResponse(WrappedResponse ww) { + String error = ConstraintViolationUtil.getErrorStringForConstraintViolations(ww.getCause()); + if (!error.isEmpty()) { + logger.log(Level.INFO, error); + return ww.refineResponse(error); + } + return ww.getResponse(); + } + + private Response handleEJBException(EJBException ex, String action) { + Throwable cause = ex; + StringBuilder sb = new StringBuilder(); + sb.append(action); + while (cause.getCause() != null) { + cause = cause.getCause(); + if (cause instanceof ConstraintViolationException) { + sb.append(ConstraintViolationUtil.getErrorStringForConstraintViolations(cause)); + } + } + logger.log(Level.SEVERE, sb.toString()); + return error(Response.Status.INTERNAL_SERVER_ERROR, sb.toString()); + } + private List parseNewDataverseMetadataBlocks(JsonArray metadataBlockNamesArray) throws WrappedResponse { List selectedMetadataBlocks = new ArrayList<>(); for (JsonString metadataBlockName : metadataBlockNamesArray.getValuesAs(JsonString.class)) { @@ -407,6 +487,12 @@ public Response importDataset(@Context ContainerRequestContext crc, String jsonB if (ds.getIdentifier() == null) { return badRequest("Please provide a persistent identifier, either by including it in the JSON, or by using the pid query parameter."); } + + PidProvider pidProvider = PidUtil.getPidProvider(ds.getGlobalId().getProviderId()); + if (pidProvider == null || !pidProvider.canManagePID()) { + return badRequest("Cannot import a dataset that has a PID that doesn't match the server's settings"); + } + boolean shouldRelease = StringUtil.isTrue(releaseParam); DataverseRequest request = createDataverseRequest(u); @@ -615,62 +701,22 @@ public Response deleteDataverse(@Context ContainerRequestContext crc, @PathParam public Response updateAttribute(@Context ContainerRequestContext crc, @PathParam("identifier") String identifier, @PathParam("attribute") String attribute, @QueryParam("value") String value) { try { - Dataverse collection = findDataverseOrDie(identifier); - User user = getRequestUser(crc); - DataverseRequest dvRequest = createDataverseRequest(user); - - // TODO: The cases below use hard coded strings, because we have no place for definitions of those! - // They are taken from util.json.JsonParser / util.json.JsonPrinter. This shall be changed. - // This also should be extended to more attributes, like the type, theme, contacts, some booleans, etc. - switch (attribute) { - case "alias": - collection.setAlias(value); - break; - case "name": - collection.setName(value); - break; - case "description": - collection.setDescription(value); - break; - case "affiliation": - collection.setAffiliation(value); - break; - /* commenting out the code from the draft pr #9462: - case "versionPidsConduct": - CollectionConduct conduct = CollectionConduct.findBy(value); - if (conduct == null) { - return badRequest("'" + value + "' is not one of [" + - String.join(",", CollectionConduct.asList()) + "]"); - } - collection.setDatasetVersionPidConduct(conduct); - break; - */ - case "filePIDsEnabled": - if(!user.isSuperuser()) { - return forbidden("You must be a superuser to change this setting"); - } - if(!settingsService.isTrueForKey(SettingsServiceBean.Key.AllowEnablingFilePIDsPerCollection, false)) { - return forbidden("Changing File PID policy per collection is not enabled on this server"); - } - collection.setFilePIDsEnabled(parseBooleanOrDie(value)); - break; - default: - return badRequest("'" + attribute + "' is not a supported attribute"); - } - - // Off to persistence layer - execCommand(new UpdateDataverseCommand(collection, null, null, dvRequest, null)); - - // Also return modified collection to user - return ok("Update successful", JsonPrinter.json(collection)); - - // TODO: This is an anti-pattern, necessary due to this bean being an EJB, causing very noisy and unnecessary - // logging by the EJB container for bubbling exceptions. (It would be handled by the error handlers.) + Dataverse dataverse = findDataverseOrDie(identifier); + Object formattedValue = formatAttributeValue(attribute, value); + dataverse = execCommand(new UpdateDataverseAttributeCommand(createDataverseRequest(getRequestUser(crc)), dataverse, attribute, formattedValue)); + return ok("Update successful", JsonPrinter.json(dataverse)); } catch (WrappedResponse e) { return e.getResponse(); } } + private Object formatAttributeValue(String attribute, String value) throws WrappedResponse { + if (attribute.equals("filePIDsEnabled")) { + return parseBooleanOrDie(value); + } + return value; + } + @GET @AuthRequired @Path("{identifier}/inputLevels") diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Files.java b/src/main/java/edu/harvard/iq/dataverse/api/Files.java index d786aab35a8..633d420c527 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Files.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Files.java @@ -343,10 +343,10 @@ public Response deleteFileInDataset(@Context ContainerRequestContext crc, @PathP DataFile dataFile = findDataFileOrDie(fileIdOrPersistentId); FileMetadata fileToDelete = dataFile.getLatestFileMetadata(); Dataset dataset = dataFile.getOwner(); - DatasetVersion v = dataset.getOrCreateEditVersion(); + dataset.getOrCreateEditVersion(); deletePhysicalFile = !dataFile.isReleased(); - UpdateDatasetVersionCommand update_cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, Arrays.asList(fileToDelete), v); + UpdateDatasetVersionCommand update_cmd = new UpdateDatasetVersionCommand(dataset, dvRequest, Arrays.asList(fileToDelete)); update_cmd.setValidateLenient(true); try { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java index 1f2f1039327..306b863c9e4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java @@ -19,6 +19,9 @@ import java.io.IOException; import java.io.InputStream; import java.net.HttpURLConnection; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; import java.net.URL; import java.util.Iterator; import java.util.List; @@ -152,10 +155,17 @@ public Response updateCitationsForDataset(@PathParam("id") String id) throws IOE // DataCite wants "doi=", not "doi:". String authorityPlusIdentifier = persistentId.replaceFirst("doi:", ""); // Request max page size and then loop to handle multiple pages - URL url = new URL(JvmSettings.DATACITE_REST_API_URL.lookup() + + URL url = null; + try { + url = new URI(JvmSettings.DATACITE_REST_API_URL.lookup(pidProvider.getId()) + "/events?doi=" + authorityPlusIdentifier + - "&source=crossref&page[size]=1000"); + "&source=crossref&page[size]=1000").toURL(); + } catch (URISyntaxException e) { + //Nominally this means a config error/ bad DATACITE_REST_API_URL for this provider + logger.warning("Unable to create URL for " + persistentId + ", pidProvider " + pidProvider.getId()); + return error(Status.INTERNAL_SERVER_ERROR, "Unable to create DataCite URL to retrieve citations."); + } logger.fine("Retrieving Citations from " + url.toString()); boolean nextPage = true; JsonArrayBuilder dataBuilder = Json.createArrayBuilder(); @@ -178,7 +188,12 @@ public Response updateCitationsForDataset(@PathParam("id") String id) throws IOE dataBuilder.add(iter.next()); } if (links.containsKey("next")) { - url = new URL(links.getString("next")); + try { + url = new URI(links.getString("next")).toURL(); + } catch (URISyntaxException e) { + logger.warning("Unable to create URL from DataCite response: " + links.getString("next")); + return error(Status.INTERNAL_SERVER_ERROR, "Unable to retrieve all results from DataCite"); + } } else { nextPage = false; } @@ -187,7 +202,7 @@ public Response updateCitationsForDataset(@PathParam("id") String id) throws IOE JsonArray allData = dataBuilder.build(); List datasetExternalCitations = datasetExternalCitationsService.parseCitations(allData); /* - * ToDo: If this is the only source of citations, we should remove all the existing ones for the dataset and repopuate them. + * ToDo: If this is the only source of citations, we should remove all the existing ones for the dataset and repopulate them. * As is, this call doesn't remove old citations if there are now none (legacy issue if we decide to stop counting certain types of citation * as we've done for 'hasPart'). * If there are some, this call individually checks each one and if a matching item exists, it removes it and adds it back. Faster and better to delete all and diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Metrics.java b/src/main/java/edu/harvard/iq/dataverse/api/Metrics.java index 452e5df9f9a..f36c514859e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Metrics.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Metrics.java @@ -206,12 +206,13 @@ public Response getDatasetsTimeSeriest(@Context Request req, @Context UriInfo ur return error(BAD_REQUEST, ia.getLocalizedMessage()); } String metricName = "datasets"; - JsonArray jsonArray = MetricsUtil.stringToJsonArray(metricsSvc.returnUnexpiredCacheAllTime(metricName, null, d)); + String validDataLocation = MetricsUtil.validateDataLocationStringType(dataLocation); + JsonArray jsonArray = MetricsUtil.stringToJsonArray(metricsSvc.returnUnexpiredCacheAllTime(metricName, validDataLocation, d)); if (null == jsonArray) { // run query and save - jsonArray = metricsSvc.getDatasetsTimeSeries(uriInfo, dataLocation, d); - metricsSvc.save(new Metric(metricName, null, null, d, jsonArray.toString())); + jsonArray = metricsSvc.getDatasetsTimeSeries(uriInfo, validDataLocation, d); + metricsSvc.save(new Metric(metricName, null, validDataLocation, d, jsonArray.toString())); } MediaType requestedType = getVariant(req, MediaType.valueOf(FileUtil.MIME_TYPE_CSV), MediaType.APPLICATION_JSON_TYPE); if ((requestedType != null) && (requestedType.equals(MediaType.APPLICATION_JSON_TYPE))) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Users.java b/src/main/java/edu/harvard/iq/dataverse/api/Users.java index 1f5430340c2..ecf7839e616 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Users.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Users.java @@ -24,13 +24,7 @@ import jakarta.ejb.Stateless; import jakarta.json.JsonArray; import jakarta.json.JsonObjectBuilder; -import jakarta.ws.rs.BadRequestException; -import jakarta.ws.rs.DELETE; -import jakarta.ws.rs.GET; -import jakarta.ws.rs.POST; -import jakarta.ws.rs.Path; -import jakarta.ws.rs.PathParam; -import jakarta.ws.rs.Produces; +import jakarta.ws.rs.*; import jakarta.ws.rs.container.ContainerRequestContext; import jakarta.ws.rs.core.Context; import jakarta.ws.rs.core.MediaType; @@ -143,21 +137,26 @@ public Response deleteToken(@Context ContainerRequestContext crc) { @Path("token") @AuthRequired @GET - public Response getTokenExpirationDate() { - ApiToken token = authSvc.findApiToken(getRequestApiKey()); - - if (token == null) { - return notFound("Token " + getRequestApiKey() + " not found."); + public Response getTokenExpirationDate(@Context ContainerRequestContext crc) { + try { + AuthenticatedUser user = getRequestAuthenticatedUserOrDie(crc); + ApiToken token = authSvc.findApiTokenByUser(user); + + if (token == null) { + return notFound("Token not found."); + } + + return ok(String.format("Token %s expires on %s", token.getTokenString(), token.getExpireTime())); + + } catch (WrappedResponse wr) { + return wr.getResponse(); } - - return ok("Token " + getRequestApiKey() + " expires on " + token.getExpireTime()); - } @Path("token/recreate") @AuthRequired @POST - public Response recreateToken(@Context ContainerRequestContext crc) { + public Response recreateToken(@Context ContainerRequestContext crc, @QueryParam("returnExpiration") boolean returnExpiration) { User u = getRequestUser(crc); AuthenticatedUser au; @@ -174,8 +173,12 @@ public Response recreateToken(@Context ContainerRequestContext crc) { ApiToken newToken = authSvc.generateApiTokenForUser(au); authSvc.save(newToken); - return ok("New token for " + au.getUserIdentifier() + " is " + newToken.getTokenString()); + String message = "New token for " + au.getUserIdentifier() + " is " + newToken.getTokenString(); + if (returnExpiration) { + message += " and expires on " + newToken.getExpireTime(); + } + return ok(message); } @GET diff --git a/src/main/java/edu/harvard/iq/dataverse/api/dto/DataverseDTO.java b/src/main/java/edu/harvard/iq/dataverse/api/dto/DataverseDTO.java new file mode 100644 index 00000000000..4f2f1032c07 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/api/dto/DataverseDTO.java @@ -0,0 +1,63 @@ +package edu.harvard.iq.dataverse.api.dto; + +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.DataverseContact; + +import java.util.List; + +public class DataverseDTO { + private String alias; + private String name; + private String description; + private String affiliation; + private List dataverseContacts; + private Dataverse.DataverseType dataverseType; + + public String getAlias() { + return alias; + } + + public void setAlias(String alias) { + this.alias = alias; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + public String getAffiliation() { + return affiliation; + } + + public void setAffiliation(String affiliation) { + this.affiliation = affiliation; + } + + public List getDataverseContacts() { + return dataverseContacts; + } + + public void setDataverseContacts(List dataverseContacts) { + this.dataverseContacts = dataverseContacts; + } + + public Dataverse.DataverseType getDataverseType() { + return dataverseType; + } + + public void setDataverseType(Dataverse.DataverseType dataverseType) { + this.dataverseType = dataverseType; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java index 85d4868605d..35d35316f73 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportDDIServiceBean.java @@ -210,7 +210,7 @@ private void processDDI(ImportType importType, XMLStreamReader xmlr, DatasetDTO // study description section. we'll use the one we found in // the codeBook entry: FieldDTO otherIdValue = FieldDTO.createPrimitiveFieldDTO("otherIdValue", codeBookLevelId); - FieldDTO otherId = FieldDTO.createCompoundFieldDTO("otherId", otherIdValue); + FieldDTO otherId = FieldDTO.createMultipleCompoundFieldDTO("otherId", otherIdValue); citationBlock.getFields().add(otherId); } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java index d2bba56f884..ee4609a7c56 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java @@ -7,7 +7,6 @@ import com.google.gson.Gson; import com.google.gson.GsonBuilder; -import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetField; import edu.harvard.iq.dataverse.DatasetFieldConstant; @@ -20,6 +19,7 @@ import edu.harvard.iq.dataverse.DataverseContact; import edu.harvard.iq.dataverse.DataverseServiceBean; import edu.harvard.iq.dataverse.EjbDataverseEngine; +import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.MetadataBlockServiceBean; import edu.harvard.iq.dataverse.api.dto.DatasetDTO; import edu.harvard.iq.dataverse.api.imports.ImportUtil.ImportType; @@ -31,6 +31,7 @@ import edu.harvard.iq.dataverse.engine.command.impl.CreateHarvestedDatasetCommand; import edu.harvard.iq.dataverse.engine.command.impl.CreateNewDatasetCommand; import edu.harvard.iq.dataverse.engine.command.impl.DestroyDatasetCommand; +import edu.harvard.iq.dataverse.engine.command.impl.UpdateHarvestedDatasetCommand; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.search.IndexServiceBean; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; @@ -40,6 +41,7 @@ import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.license.LicenseServiceBean; import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.util.DatasetFieldUtil; import java.io.File; import java.io.FileOutputStream; @@ -208,7 +210,7 @@ public JsonObjectBuilder handleFile(DataverseRequest dataverseRequest, Dataverse @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, HarvestingClient harvestingClient, String harvestIdentifier, String metadataFormat, File metadataFile, Date oaiDateStamp, PrintWriter cleanupLog) throws ImportException, IOException { if (harvestingClient == null || harvestingClient.getDataverse() == null) { - throw new ImportException("importHarvestedDataset called wiht a null harvestingClient, or an invalid harvestingClient."); + throw new ImportException("importHarvestedDataset called with a null harvestingClient, or an invalid harvestingClient."); } Dataverse owner = harvestingClient.getDataverse(); Dataset importedDataset = null; @@ -268,116 +270,121 @@ public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, Harve } JsonObject obj = JsonUtil.getJsonObject(json); - //and call parse Json to read it into a dataset + + String protocol = obj.getString("protocol", null); + String authority = obj.getString("authority", null); + String identifier = obj.getString("identifier",null); + + GlobalId globalId; + + // A Global ID is required: + // (meaning, we will fail with an exception if the imports above have + // not managed to find an acceptable global identifier in the harvested + // metadata) + + try { + globalId = PidUtil.parseAsGlobalID(protocol, authority, identifier); + } catch (IllegalArgumentException iax) { + throw new ImportException("The harvested metadata record with the OAI server identifier " + harvestIdentifier + " does not contain a global identifier this Dataverse can parse, skipping."); + } + + if (globalId == null) { + throw new ImportException("The harvested metadata record with the OAI server identifier " + harvestIdentifier + " does not contain a global identifier this Dataverse recognizes, skipping."); + } + + String globalIdString = globalId.asString(); + + if (StringUtils.isEmpty(globalIdString)) { + // @todo this check may not be necessary, now that there's a null check above + throw new ImportException("The harvested metadata record with the OAI server identifier " + harvestIdentifier + " does not contain a global identifier this Dataverse recognizes, skipping."); + } + + DatasetVersion harvestedVersion; + + Dataset existingDataset = datasetService.findByGlobalId(globalIdString); + try { + Dataset harvestedDataset; + JsonParser parser = new JsonParser(datasetfieldService, metadataBlockService, settingsService, licenseService, datasetTypeService, harvestingClient); parser.setLenient(true); - Dataset ds = parser.parseDataset(obj); - // For ImportType.NEW, if the metadata contains a global identifier, and it's not a protocol - // we support, it should be rejected. - // (TODO: ! - add some way of keeping track of supported protocols!) - //if (ds.getGlobalId() != null && !ds.getProtocol().equals(settingsService.getValueForKey(SettingsServiceBean.Key.Protocol, ""))) { - // throw new ImportException("Could not register id " + ds.getGlobalId() + ", protocol not supported"); - //} - ds.setOwner(owner); - ds.getLatestVersion().setDatasetFields(ds.getLatestVersion().initDatasetFields()); + if (existingDataset == null) { + // Creating a new dataset from scratch: + + harvestedDataset = parser.parseDataset(obj); - if (ds.getVersions().get(0).getReleaseTime() == null) { - ds.getVersions().get(0).setReleaseTime(oaiDateStamp); - } - - // Check data against required contraints - List> violations = ds.getVersions().get(0).validateRequired(); - if (!violations.isEmpty()) { - // For migration and harvest, add NA for missing required values - for (ConstraintViolation v : violations) { - DatasetField f = v.getRootBean(); - f.setSingleValue(DatasetField.NA_VALUE); + harvestedDataset.setHarvestedFrom(harvestingClient); + harvestedDataset.setHarvestIdentifier(harvestIdentifier); + + harvestedVersion = harvestedDataset.getVersions().get(0); + } else { + // We already have a dataset with this id in the database. + // Let's check a few things before we go any further with it: + + // If this dataset already exists IN ANOTHER COLLECTION + // we are just going to skip it! + if (existingDataset.getOwner() != null && !owner.getId().equals(existingDataset.getOwner().getId())) { + throw new ImportException("The dataset with the global id " + globalIdString + " already exists, in the dataverse " + existingDataset.getOwner().getAlias() + ", skipping."); } - } - - // Check data against validation constraints - // If we are migrating and "scrub migration data" is true we attempt to fix invalid data - // if the fix fails stop processing of this file by throwing exception - Set invalidViolations = ds.getVersions().get(0).validate(); - ValidatorFactory factory = Validation.buildDefaultValidatorFactory(); - Validator validator = factory.getValidator(); - if (!invalidViolations.isEmpty()) { - for (ConstraintViolation v : invalidViolations) { - DatasetFieldValue f = v.getRootBean(); - boolean fixed = false; - boolean converted = false; - // TODO: Is this scrubbing something we want to continue doing? - if (settingsService.isTrueForKey(SettingsServiceBean.Key.ScrubMigrationData, false)) { - fixed = processMigrationValidationError(f, cleanupLog, metadataFile.getName()); - converted = true; - if (fixed) { - Set> scrubbedViolations = validator.validate(f); - if (!scrubbedViolations.isEmpty()) { - fixed = false; - } - } - } - if (!fixed) { - String msg = "Data modified - File: " + metadataFile.getName() + "; Field: " + f.getDatasetField().getDatasetFieldType().getDisplayName() + "; " - + "Invalid value: '" + f.getValue() + "'" + " Converted Value:'" + DatasetField.NA_VALUE + "'"; - cleanupLog.println(msg); - f.setValue(DatasetField.NA_VALUE); - - } + // And if we already have a dataset with this same global id at + // this Dataverse instance, but it is a LOCAL dataset (can happen!), + // we're going to skip it also: + if (!existingDataset.isHarvested()) { + throw new ImportException("A LOCAL dataset with the global id " + globalIdString + " already exists in this dataverse; skipping."); } + // For harvested datasets, there should always only be one version. + if (existingDataset.getVersions().size() != 1) { + throw new ImportException("Error importing Harvested Dataset, existing dataset has " + existingDataset.getVersions().size() + " versions"); + } + + // We will attempt to import the new version, and replace the + // current, already existing version with it. + harvestedVersion = parser.parseDatasetVersion(obj.getJsonObject("datasetVersion")); + + // For the purposes of validation, the version needs to be attached + // to a non-null dataset. We will create a throwaway temporary + // dataset for this: + harvestedDataset = createTemporaryHarvestedDataset(harvestedVersion); } + + harvestedDataset.setOwner(owner); - // A Global ID is required, in order for us to be able to harvest and import - // this dataset: - if (StringUtils.isEmpty(ds.getGlobalId().asString())) { - throw new ImportException("The harvested metadata record with the OAI server identifier "+harvestIdentifier+" does not contain a global unique identifier that we could recognize, skipping."); - } - - ds.setHarvestedFrom(harvestingClient); - ds.setHarvestIdentifier(harvestIdentifier); + // Either a full new import, or an update of an existing harvested + // Dataset, perform some cleanup on the new version imported from the + // parsed metadata: - Dataset existingDs = datasetService.findByGlobalId(ds.getGlobalId().asString()); + harvestedVersion.setDatasetFields(harvestedVersion.initDatasetFields()); - if (existingDs != null) { - // If this dataset already exists IN ANOTHER DATAVERSE - // we are just going to skip it! - if (existingDs.getOwner() != null && !owner.getId().equals(existingDs.getOwner().getId())) { - throw new ImportException("The dataset with the global id "+ds.getGlobalId().asString()+" already exists, in the dataverse "+existingDs.getOwner().getAlias()+", skipping."); - } - // And if we already have a dataset with this same id, in this same - // dataverse, but it is LOCAL dataset (can happen!), we're going to - // skip it also: - if (!existingDs.isHarvested()) { - throw new ImportException("A LOCAL dataset with the global id "+ds.getGlobalId().asString()+" already exists in this dataverse; skipping."); - } - // For harvested datasets, there should always only be one version. - // We will replace the current version with the imported version. - if (existingDs.getVersions().size() != 1) { - throw new ImportException("Error importing Harvested Dataset, existing dataset has " + existingDs.getVersions().size() + " versions"); - } - // Purge all the SOLR documents associated with this client from the - // index server: - indexService.deleteHarvestedDocuments(existingDs); - // files from harvested datasets are removed unceremoniously, - // directly in the database. no need to bother calling the - // DeleteFileCommand on them. - for (DataFile harvestedFile : existingDs.getFiles()) { - DataFile merged = em.merge(harvestedFile); - em.remove(merged); - harvestedFile = null; - } - // TODO: - // Verify what happens with the indexed files in SOLR? - // are they going to be overwritten by the reindexing of the dataset? - existingDs.setFiles(null); - Dataset merged = em.merge(existingDs); - // harvested datasets don't have physical files - so no need to worry about that. - engineSvc.submit(new DestroyDatasetCommand(merged, dataverseRequest)); + if (harvestedVersion.getReleaseTime() == null) { + harvestedVersion.setReleaseTime(oaiDateStamp); } + + // Check data against validation constraints. + // Make an attempt to sanitize any invalid fields encountered - + // missing required fields or invalid values, by filling the values + // with NAs. + + boolean sanitized = validateAndSanitizeVersionMetadata(harvestedVersion, cleanupLog); + + // Note: this sanitizing approach, of replacing invalid values with + // "NA" does not work with certain fields. For example, using it to + // populate a GeoBox coordinate value will result in an invalid + // field. So we will attempt to re-validate the santized version. + // This time around, it will throw an exception if still invalid, so + // that we'll stop before proceeding any further: - importedDataset = engineSvc.submit(new CreateHarvestedDatasetCommand(ds, dataverseRequest)); + if (sanitized) { + validateVersionMetadata(harvestedVersion, cleanupLog); + } + + DatasetFieldUtil.tidyUpFields(harvestedVersion.getDatasetFields(), true); + + if (existingDataset != null) { + importedDataset = engineSvc.submit(new UpdateHarvestedDatasetCommand(existingDataset, harvestedVersion, dataverseRequest)); + } else { + importedDataset = engineSvc.submit(new CreateHarvestedDatasetCommand(harvestedDataset, dataverseRequest)); + } } catch (JsonParseException | ImportException | CommandException ex) { logger.fine("Failed to import harvested dataset: " + ex.getClass() + ": " + ex.getMessage()); @@ -439,7 +446,7 @@ public JsonObjectBuilder doImport(DataverseRequest dataverseRequest, Dataverse o ds.setOwner(owner); ds.getLatestVersion().setDatasetFields(ds.getLatestVersion().initDatasetFields()); - // Check data against required contraints + // Check data against required constraints List> violations = ds.getVersions().get(0).validateRequired(); if (!violations.isEmpty()) { if ( importType.equals(ImportType.HARVEST) ) { @@ -696,6 +703,104 @@ private String convertInvalidDateString(String inString){ return null; } + /** + * A shortcut method for validating AND attempting to sanitize a DatasetVersion + * @param version + * @param cleanupLog - any invalid values and their replacements are logged there + * @return true if any invalid values were encountered and sanitized + * @throws ImportException (although it should never happen in this mode) + */ + private boolean validateAndSanitizeVersionMetadata(DatasetVersion version, PrintWriter cleanupLog) throws ImportException { + return validateVersionMetadata(version, true, cleanupLog); + } + + /** + * A shortcut method for validating a DatasetVersion; will throw an exception + * if invalid, without attempting to sanitize the invalid values. + * @param version + * @param log - will log the invalid fields encountered there + * @throws ImportException + */ + private void validateVersionMetadata(DatasetVersion version, PrintWriter log) throws ImportException { + validateVersionMetadata(version, false, log); + } + + /** + * Validate the metadata fields of a newly-created version, and depending on + * the "sanitize" flag supplied, may or may not attempt to sanitize the supplied + * values by replacing them with "NA"s. + * @param version + * @param sanitize - boolean indicating whether to attempt to fix invalid values + * @param cleanupLog - to log any invalid values encountered will be logged + * @return - true if any invalid values have been replaced + * @throws ImportException + */ + private boolean validateVersionMetadata(DatasetVersion version, boolean sanitize, PrintWriter cleanupLog) throws ImportException { + boolean fixed = false; + Set invalidViolations = version.validate(); + if (!invalidViolations.isEmpty()) { + for (ConstraintViolation v : invalidViolations) { + Object invalid = v.getRootBean(); + String msg = ""; + if (invalid instanceof DatasetField) { + DatasetField f = (DatasetField) invalid; + + msg += "Missing required field: " + f.getDatasetFieldType().getDisplayName() + ";"; + if (sanitize) { + msg += " populated with '" + DatasetField.NA_VALUE + "'"; + f.setSingleValue(DatasetField.NA_VALUE); + fixed = true; + } + } else if (invalid instanceof DatasetFieldValue) { + DatasetFieldValue fv = (DatasetFieldValue) invalid; + + msg += "Invalid metadata field: " + fv.getDatasetField().getDatasetFieldType().getDisplayName() + "; " + + "Invalid value: '" + fv.getValue() + "'"; + if (sanitize) { + msg += ", replaced with '" + DatasetField.NA_VALUE + "'"; + fv.setValue(DatasetField.NA_VALUE); + fixed = true; + } + } else { + // DatasetVersion.validate() can also produce constraint violations + // in TermsOfUse and FileMetadata classes. + // We do not make any attempt to sanitize those. + if (invalid != null) { + msg += "Invalid " + invalid.getClass().getName() + ": " + v.getMessage(); + } + } + cleanupLog.println(msg); + + // Note: "NA" does not work with certain fields. For example, + // using it to populate a GeoBox coordinate value is going + // to result in an invalid field. So we'll need to validate the + // version again after the first, sanitizing pass and see if it + // helped or not. + } + if (!sanitize) { + throw new ImportException("Version was still failing validation after the first attempt to sanitize the invalid values."); + } + } + return fixed; + } + + /** + * Helper method that creates a throwaway Harvested Dataset to temporarily + * attach the newly-harvested version to. We need this when, instead of + * importing a brand-new harvested dataset from scratch, we are planning to + * attempt to update an already existing dataset harvested from the same + * archival location. + * @param harvestedVersion - a newly created Version imported from harvested metadata + * @return - a temporary dataset to which the new version has been attached + */ + private Dataset createTemporaryHarvestedDataset(DatasetVersion harvestedVersion) { + Dataset tempDataset = new Dataset(); + harvestedVersion.setDataset(tempDataset); + tempDataset.setVersions(new ArrayList<>(1)); + tempDataset.getVersions().add(harvestedVersion); + + return tempDataset; + } private static class MyCustomFormatter extends Formatter { diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/AbstractOAuth2AuthenticationProvider.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/AbstractOAuth2AuthenticationProvider.java index 7fd7bf3e885..a6b7c1b9d49 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/AbstractOAuth2AuthenticationProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/AbstractOAuth2AuthenticationProvider.java @@ -30,7 +30,7 @@ */ public abstract class AbstractOAuth2AuthenticationProvider implements AuthenticationProvider { - final static Logger logger = Logger.getLogger(AbstractOAuth2AuthenticationProvider.class.getName()); + static final Logger logger = Logger.getLogger(AbstractOAuth2AuthenticationProvider.class.getName()); protected static class ParsedUserResponse { public final AuthenticatedUserDisplayInfo displayInfo; diff --git a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2AP.java b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2AP.java index 089ca40e164..323c78ab47a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2AP.java +++ b/src/main/java/edu/harvard/iq/dataverse/authorization/providers/oauth2/impl/OrcidOAuth2AP.java @@ -49,7 +49,7 @@ */ public class OrcidOAuth2AP extends AbstractOAuth2AuthenticationProvider { - final static Logger logger = Logger.getLogger(OrcidOAuth2AP.class.getName()); + static final Logger logger = Logger.getLogger(OrcidOAuth2AP.class.getName()); public static final String PROVIDER_ID_PRODUCTION = "orcid"; public static final String PROVIDER_ID_SANDBOX = "orcid-sandbox"; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/DataverseRequest.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/DataverseRequest.java index d792b616a0c..4d3ec2842a1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/DataverseRequest.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/DataverseRequest.java @@ -26,9 +26,9 @@ public class DataverseRequest { private final String invocationId; private final HttpServletRequest httpServletRequest; - private final static String undefined = "0.0.0.0"; + private static final String undefined = "0.0.0.0"; - private final static String MDKEY_PREFIX="mdkey."; + private static final String MDKEY_PREFIX="mdkey."; private static final Logger logger = Logger.getLogger(DataverseRequest.class.getName()); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java index db9dc142506..b36a638956f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractCreateDatasetCommand.java @@ -13,8 +13,10 @@ import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.pidproviders.PidProvider; import static edu.harvard.iq.dataverse.util.StringUtil.isEmpty; +import java.io.IOException; import java.util.Objects; import java.util.logging.Logger; +import org.apache.solr.client.solrj.SolrServerException; /**; * An abstract base class for commands that creates {@link Dataset}s. @@ -148,9 +150,19 @@ public Dataset execute(CommandContext ctxt) throws CommandException { //Use for code that requires database ids postDBFlush(theDataset, ctxt); - - ctxt.index().asyncIndexDataset(theDataset, true); - + + if (harvested) { + try { + ctxt.index().indexDataset(theDataset, true); + } catch (SolrServerException | IOException solrEx) { + logger.warning("Failed to index harvested dataset. " + solrEx.getMessage()); + } + } else { + // The asynchronous version does not throw any exceptions, + // logging them internally instead. + ctxt.index().asyncIndexDataset(theDataset, true); + } + return theDataset; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractWriteDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractWriteDataverseCommand.java new file mode 100644 index 00000000000..40c2abf5d21 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/AbstractWriteDataverseCommand.java @@ -0,0 +1,85 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.*; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; + +import java.util.ArrayList; +import java.util.List; + +/** + * An abstract base class for commands that perform write operations on {@link Dataverse}s. + */ +abstract class AbstractWriteDataverseCommand extends AbstractCommand { + + protected Dataverse dataverse; + private final List inputLevels; + private final List facets; + protected final List metadataBlocks; + + public AbstractWriteDataverseCommand(Dataverse dataverse, + Dataverse affectedDataverse, + DataverseRequest request, + List facets, + List inputLevels, + List metadataBlocks) { + super(request, affectedDataverse); + this.dataverse = dataverse; + if (facets != null) { + this.facets = new ArrayList<>(facets); + } else { + this.facets = null; + } + if (inputLevels != null) { + this.inputLevels = new ArrayList<>(inputLevels); + } else { + this.inputLevels = null; + } + if (metadataBlocks != null) { + this.metadataBlocks = new ArrayList<>(metadataBlocks); + } else { + this.metadataBlocks = null; + } + } + + @Override + public Dataverse execute(CommandContext ctxt) throws CommandException { + dataverse = innerExecute(ctxt); + + if (metadataBlocks != null && !metadataBlocks.isEmpty()) { + dataverse.setMetadataBlockRoot(true); + dataverse.setMetadataBlocks(metadataBlocks); + } + + if (facets != null) { + ctxt.facets().deleteFacetsFor(dataverse); + + if (!facets.isEmpty()) { + dataverse.setFacetRoot(true); + } + + int i = 0; + for (DatasetFieldType df : facets) { + ctxt.facets().create(i++, df, dataverse); + } + } + + if (inputLevels != null) { + if (!inputLevels.isEmpty()) { + dataverse.addInputLevelsMetadataBlocksIfNotPresent(inputLevels); + } + ctxt.fieldTypeInputLevels().deleteFacetsFor(dataverse); + for (DataverseFieldTypeInputLevel inputLevel : inputLevels) { + inputLevel.setDataverse(dataverse); + ctxt.fieldTypeInputLevels().create(inputLevel); + } + } + + return ctxt.dataverses().save(dataverse); + } + + abstract protected Dataverse innerExecute(CommandContext ctxt) throws IllegalCommandException; +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDataverseCommand.java index 489b36e7cef..145cfb6199c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDataverseCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateDataverseCommand.java @@ -6,11 +6,9 @@ import edu.harvard.iq.dataverse.authorization.groups.Group; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.User; -import edu.harvard.iq.dataverse.engine.command.AbstractCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; -import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; @@ -27,48 +25,26 @@ * @author michael */ @RequiredPermissions(Permission.AddDataverse) -public class CreateDataverseCommand extends AbstractCommand { - - private final Dataverse created; - private final List inputLevelList; - private final List facetList; - private final List metadataBlocks; +public class CreateDataverseCommand extends AbstractWriteDataverseCommand { public CreateDataverseCommand(Dataverse created, - DataverseRequest aRequest, - List facetList, - List inputLevelList) { - this(created, aRequest, facetList, inputLevelList, null); + DataverseRequest request, + List facets, + List inputLevels) { + this(created, request, facets, inputLevels, null); } public CreateDataverseCommand(Dataverse created, - DataverseRequest aRequest, - List facetList, - List inputLevelList, + DataverseRequest request, + List facets, + List inputLevels, List metadataBlocks) { - super(aRequest, created.getOwner()); - this.created = created; - if (facetList != null) { - this.facetList = new ArrayList<>(facetList); - } else { - this.facetList = null; - } - if (inputLevelList != null) { - this.inputLevelList = new ArrayList<>(inputLevelList); - } else { - this.inputLevelList = null; - } - if (metadataBlocks != null) { - this.metadataBlocks = new ArrayList<>(metadataBlocks); - } else { - this.metadataBlocks = null; - } + super(created, created.getOwner(), request, facets, inputLevels, metadataBlocks); } @Override - public Dataverse execute(CommandContext ctxt) throws CommandException { - - Dataverse owner = created.getOwner(); + protected Dataverse innerExecute(CommandContext ctxt) throws IllegalCommandException { + Dataverse owner = dataverse.getOwner(); if (owner == null) { if (ctxt.dataverses().isRootDataverseExists()) { throw new IllegalCommandException("Root Dataverse already exists. Cannot create another one", this); @@ -76,44 +52,44 @@ public Dataverse execute(CommandContext ctxt) throws CommandException { } if (metadataBlocks != null && !metadataBlocks.isEmpty()) { - created.setMetadataBlockRoot(true); - created.setMetadataBlocks(metadataBlocks); + dataverse.setMetadataBlockRoot(true); + dataverse.setMetadataBlocks(metadataBlocks); } - if (created.getCreateDate() == null) { - created.setCreateDate(new Timestamp(new Date().getTime())); + if (dataverse.getCreateDate() == null) { + dataverse.setCreateDate(new Timestamp(new Date().getTime())); } - if (created.getCreator() == null) { + if (dataverse.getCreator() == null) { final User user = getRequest().getUser(); if (user.isAuthenticated()) { - created.setCreator((AuthenticatedUser) user); + dataverse.setCreator((AuthenticatedUser) user); } else { throw new IllegalCommandException("Guest users cannot create a Dataverse.", this); } } - if (created.getDataverseType() == null) { - created.setDataverseType(Dataverse.DataverseType.UNCATEGORIZED); + if (dataverse.getDataverseType() == null) { + dataverse.setDataverseType(Dataverse.DataverseType.UNCATEGORIZED); } - if (created.getDefaultContributorRole() == null) { - created.setDefaultContributorRole(ctxt.roles().findBuiltinRoleByAlias(DataverseRole.EDITOR)); + if (dataverse.getDefaultContributorRole() == null) { + dataverse.setDefaultContributorRole(ctxt.roles().findBuiltinRoleByAlias(DataverseRole.EDITOR)); } // @todo for now we are saying all dataverses are permission root - created.setPermissionRoot(true); + dataverse.setPermissionRoot(true); - if (ctxt.dataverses().findByAlias(created.getAlias()) != null) { - throw new IllegalCommandException("A dataverse with alias " + created.getAlias() + " already exists", this); + if (ctxt.dataverses().findByAlias(dataverse.getAlias()) != null) { + throw new IllegalCommandException("A dataverse with alias " + dataverse.getAlias() + " already exists", this); } - if (created.getFilePIDsEnabled() != null && !ctxt.settings().isTrueForKey(SettingsServiceBean.Key.AllowEnablingFilePIDsPerCollection, false)) { + if (dataverse.getFilePIDsEnabled() != null && !ctxt.settings().isTrueForKey(SettingsServiceBean.Key.AllowEnablingFilePIDsPerCollection, false)) { throw new IllegalCommandException("File PIDs cannot be enabled per collection", this); } // Save the dataverse - Dataverse managedDv = ctxt.dataverses().save(created); + Dataverse managedDv = ctxt.dataverses().save(dataverse); // Find the built in admin role (currently by alias) DataverseRole adminRole = ctxt.roles().findBuiltinRoleByAlias(DataverseRole.ADMIN); @@ -160,33 +136,6 @@ public Dataverse execute(CommandContext ctxt) throws CommandException { } managedDv.setPermissionModificationTime(new Timestamp(new Date().getTime())); - - if (facetList != null) { - ctxt.facets().deleteFacetsFor(managedDv); - - if (!facetList.isEmpty()) { - managedDv.setFacetRoot(true); - } - - int i = 0; - for (DatasetFieldType df : facetList) { - ctxt.facets().create(i++, df, managedDv); - } - } - - if (inputLevelList != null) { - if (!inputLevelList.isEmpty()) { - managedDv.addInputLevelsMetadataBlocksIfNotPresent(inputLevelList); - } - ctxt.fieldTypeInputLevels().deleteFacetsFor(managedDv); - for (DataverseFieldTypeInputLevel inputLevel : inputLevelList) { - inputLevel.setDataverse(managedDv); - ctxt.fieldTypeInputLevels().create(inputLevel); - } - } - - // TODO: save is called here and above; we likely don't need both - managedDv = ctxt.dataverses().save(managedDv); return managedDv; } @@ -194,5 +143,4 @@ public Dataverse execute(CommandContext ctxt) throws CommandException { public boolean onSuccess(CommandContext ctxt, Object r) { return ctxt.dataverses().index((Dataverse) r); } - } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java index 3a21345448b..76939751899 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesCommand.java @@ -2,34 +2,29 @@ import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.authorization.Permission; import edu.harvard.iq.dataverse.datasetutility.FileExceedsMaxSizeException; import edu.harvard.iq.dataverse.datasetutility.FileSizeChecker; -import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable; import edu.harvard.iq.dataverse.engine.command.AbstractCommand; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; -//import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.CommandExecutionException; import edu.harvard.iq.dataverse.ingest.IngestServiceShapefileHelper; -import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit; -import edu.harvard.iq.dataverse.util.file.FileExceedsStorageQuotaException; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.FileUtil; -import static edu.harvard.iq.dataverse.util.FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT; -import static edu.harvard.iq.dataverse.util.FileUtil.createIngestFailureReport; -import static edu.harvard.iq.dataverse.util.FileUtil.determineFileType; -import static edu.harvard.iq.dataverse.util.FileUtil.determineFileTypeByNameAndExtension; -import static edu.harvard.iq.dataverse.util.FileUtil.getFilesTempDirectory; -import static edu.harvard.iq.dataverse.util.FileUtil.saveInputStreamInTempFile; -import static edu.harvard.iq.dataverse.util.FileUtil.useRecognizedType; import edu.harvard.iq.dataverse.util.ShapefileHandler; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.file.BagItFileHandler; import edu.harvard.iq.dataverse.util.file.BagItFileHandlerFactory; import edu.harvard.iq.dataverse.util.file.CreateDataFileResult; +import edu.harvard.iq.dataverse.util.file.FileExceedsStorageQuotaException; +import jakarta.enterprise.inject.spi.CDI; +import org.apache.commons.io.FileUtils; +import org.apache.commons.lang3.StringUtils; + import java.io.File; import java.io.FileInputStream; import java.io.IOException; @@ -42,7 +37,7 @@ import java.text.MessageFormat; import java.util.ArrayList; import java.util.Arrays; -import java.util.Enumeration; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -51,12 +46,17 @@ import java.util.Set; import java.util.logging.Logger; import java.util.zip.GZIPInputStream; -import java.util.zip.ZipFile; import java.util.zip.ZipEntry; -import java.util.zip.ZipInputStream; -import jakarta.enterprise.inject.spi.CDI; -import org.apache.commons.io.FileUtils; -import org.apache.commons.lang3.StringUtils; +import java.util.zip.ZipFile; + +import static edu.harvard.iq.dataverse.datasetutility.FileSizeChecker.bytesToHumanReadable; +import static edu.harvard.iq.dataverse.util.FileUtil.MIME_TYPE_UNDETERMINED_DEFAULT; +import static edu.harvard.iq.dataverse.util.FileUtil.createIngestFailureReport; +import static edu.harvard.iq.dataverse.util.FileUtil.determineFileType; +import static edu.harvard.iq.dataverse.util.FileUtil.determineFileTypeByNameAndExtension; +import static edu.harvard.iq.dataverse.util.FileUtil.getFilesTempDirectory; +import static edu.harvard.iq.dataverse.util.FileUtil.saveInputStreamInTempFile; +import static edu.harvard.iq.dataverse.util.FileUtil.useRecognizedType; /** * @@ -140,9 +140,10 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException if (newStorageIdentifier == null) { - if (getFilesTempDirectory() != null) { + var filesTempDirectory = getFilesTempDirectory(); + if (filesTempDirectory != null) { try { - tempFile = Files.createTempFile(Paths.get(getFilesTempDirectory()), "tmp", "upload"); + tempFile = Files.createTempFile(Paths.get(filesTempDirectory), "tmp", "upload"); // "temporary" location is the key here; this is why we are not using // the DataStore framework for this - the assumption is that // temp files will always be stored on the local filesystem. @@ -260,10 +261,6 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException // DataFile objects from its contents: } else if (finalType.equals("application/zip")) { - ZipFile zipFile = null; - ZipInputStream unZippedIn = null; - ZipEntry zipEntry = null; - int fileNumberLimit = ctxt.systemConfig().getZipUploadFilesLimit(); Long combinedUnzippedFileSize = 0L; @@ -271,14 +268,14 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException Charset charset = null; /* TODO: (?) - We may want to investigate somehow letting the user specify + We may want to investigate somehow letting the user specify the charset for the filenames in the zip file... - - otherwise, ZipInputStream bails out if it encounteres a file - name that's not valid in the current charest (i.e., UTF-8, in - our case). It would be a bit trickier than what we're doing for - SPSS tabular ingests - with the lang. encoding pulldown menu - + - otherwise, ZipInputStream bails out if it encounteres a file + name that's not valid in the current charest (i.e., UTF-8, in + our case). It would be a bit trickier than what we're doing for + SPSS tabular ingests - with the lang. encoding pulldown menu - because this encoding needs to be specified *before* we upload and - attempt to unzip the file. + attempt to unzip the file. -- L.A. 4.0 beta12 logger.info("default charset is "+Charset.defaultCharset().name()); if (Charset.isSupported("US-ASCII")) { @@ -287,25 +284,21 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException if (charset != null) { logger.info("was able to obtain charset for US-ASCII"); } - + } */ - /** - * Perform a quick check for how many individual files are - * inside this zip archive. If it's above the limit, we can - * give up right away, without doing any unpacking. + /** + * Perform a quick check for how many individual files are + * inside this zip archive. If it's above the limit, we can + * give up right away, without doing any unpacking. * This should be a fairly inexpensive operation, we just need - * to read the directory at the end of the file. + * to read the directory at the end of the file. */ - - if (charset != null) { - zipFile = new ZipFile(tempFile.toFile(), charset); - } else { - zipFile = new ZipFile(tempFile.toFile()); - } + + /** - * The ZipFile constructors above will throw ZipException - + * The ZipFile constructors in openZipFile will throw ZipException - * a type of IOException - if there's something wrong * with this file as a zip. There's no need to intercept it * here, it will be caught further below, with other IOExceptions, @@ -313,8 +306,8 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException * then attempt to save it as is. */ - int numberOfUnpackableFiles = 0; - + int numberOfUnpackableFiles = 0; + /** * Note that we can't just use zipFile.size(), * unfortunately, since that's the total number of entries, @@ -323,83 +316,46 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException * that are files. */ - for (Enumeration entries = zipFile.entries(); entries.hasMoreElements();) { - ZipEntry entry = entries.nextElement(); - logger.fine("inside first zip pass; this entry: "+entry.getName()); - if (!entry.isDirectory()) { - String shortName = entry.getName().replaceFirst("^.*[\\/]", ""); - // ... and, finally, check if it's a "fake" file - a zip archive entry - // created for a MacOS X filesystem element: (these - // start with "._") - if (!shortName.startsWith("._") && !shortName.startsWith(".DS_Store") && !"".equals(shortName)) { - numberOfUnpackableFiles++; - if (numberOfUnpackableFiles > fileNumberLimit) { - logger.warning("Zip upload - too many files in the zip to process individually."); - warningMessage = "The number of files in the zip archive is over the limit (" + fileNumberLimit - + "); please upload a zip archive with fewer files, if you want them to be ingested " - + "as individual DataFiles."; - throw new IOException(); - } - // In addition to counting the files, we can - // also check the file size while we're here, - // provided the size limit is defined; if a single - // file is above the individual size limit, unzipped, - // we give up on unpacking this zip archive as well: - if (fileSizeLimit != null && entry.getSize() > fileSizeLimit) { - throw new FileExceedsMaxSizeException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), bytesToHumanReadable(entry.getSize()), bytesToHumanReadable(fileSizeLimit))); - } - // Similarly, we want to check if saving all these unpacked - // files is going to push the disk usage over the - // quota: - if (storageQuotaLimit != null) { - combinedUnzippedFileSize = combinedUnzippedFileSize + entry.getSize(); - if (combinedUnzippedFileSize > storageQuotaLimit) { - //throw new FileExceedsStorageQuotaException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded"), bytesToHumanReadable(combinedUnzippedFileSize), bytesToHumanReadable(storageQuotaLimit))); - // change of plans: if the unzipped content inside exceeds the remaining quota, - // we reject the upload outright, rather than accepting the zip - // file as is. - throw new CommandExecutionException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.unzipped.quota_exceeded"), bytesToHumanReadable(storageQuotaLimit)), this); - } + try (var zipFile = openZipFile(tempFile, charset)) { + var zipEntries = filteredZipEntries(zipFile); + for (var entry : zipEntries) { + logger.fine("inside first zip pass; this entry: " + entry.getName()); + numberOfUnpackableFiles++; + if (numberOfUnpackableFiles > fileNumberLimit) { + logger.warning("Zip upload - too many files in the zip to process individually."); + warningMessage = "The number of files in the zip archive is over the limit (" + fileNumberLimit + + "); please upload a zip archive with fewer files, if you want them to be ingested " + + "as individual DataFiles."; + throw new IOException(); + } + // In addition to counting the files, we can + // also check the file size while we're here, + // provided the size limit is defined; if a single + // file is above the individual size limit, unzipped, + // we give up on unpacking this zip archive as well: + if (fileSizeLimit != null && entry.getSize() > fileSizeLimit) { + throw new FileExceedsMaxSizeException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.file_exceeds_limit"), bytesToHumanReadable(entry.getSize()), bytesToHumanReadable(fileSizeLimit))); + } + // Similarly, we want to check if saving all these unpacked + // files is going to push the disk usage over the + // quota: + if (storageQuotaLimit != null) { + combinedUnzippedFileSize = combinedUnzippedFileSize + entry.getSize(); + if (combinedUnzippedFileSize > storageQuotaLimit) { + //throw new FileExceedsStorageQuotaException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.quota_exceeded"), bytesToHumanReadable(combinedUnzippedFileSize), bytesToHumanReadable(storageQuotaLimit))); + // change of plans: if the unzipped content inside exceeds the remaining quota, + // we reject the upload outright, rather than accepting the zip + // file as is. + throw new CommandExecutionException(MessageFormat.format(BundleUtil.getStringFromBundle("file.addreplace.error.unzipped.quota_exceeded"), bytesToHumanReadable(storageQuotaLimit)), this); } } } - } - - // OK we're still here - that means we can proceed unzipping. - - // Close the ZipFile, re-open as ZipInputStream: - zipFile.close(); - // reset: - combinedUnzippedFileSize = 0L; - - if (charset != null) { - unZippedIn = new ZipInputStream(new FileInputStream(tempFile.toFile()), charset); - } else { - unZippedIn = new ZipInputStream(new FileInputStream(tempFile.toFile())); - } - - while (true) { - try { - zipEntry = unZippedIn.getNextEntry(); - } catch (IllegalArgumentException iaex) { - // Note: - // ZipInputStream documentation doesn't even mention that - // getNextEntry() throws an IllegalArgumentException! - // but that's what happens if the file name of the next - // entry is not valid in the current CharSet. - // -- L.A. - warningMessage = "Failed to unpack Zip file. (Unknown Character Set used in a file name?) Saving the file as is."; - logger.warning(warningMessage); - throw new IOException(); - } + // OK we're still here - that means we can proceed unzipping. - if (zipEntry == null) { - break; - } - // Note that some zip entries may be directories - we - // simply skip them: + // reset: + combinedUnzippedFileSize = 0L; - if (!zipEntry.isDirectory()) { + for (var entry : zipEntries) { if (datafiles.size() > fileNumberLimit) { logger.warning("Zip upload - too many files."); warningMessage = "The number of files in the zip archive is over the limit (" + fileNumberLimit @@ -407,72 +363,55 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException + "as individual DataFiles."; throw new IOException(); } - - String fileEntryName = zipEntry.getName(); + var fileEntryName = entry.getName(); + var shortName = getShortName(fileEntryName); logger.fine("ZipEntry, file: " + fileEntryName); + String storageIdentifier = FileUtil.generateStorageIdentifier(); + File unzippedFile = new File(getFilesTempDirectory() + "/" + storageIdentifier); + Files.copy(zipFile.getInputStream(entry), unzippedFile.toPath(), StandardCopyOption.REPLACE_EXISTING); + // No need to check the size of this unpacked file against the size limit, + // since we've already checked for that in the first pass. + DataFile datafile = FileUtil.createSingleDataFile(version, null, storageIdentifier, shortName, + MIME_TYPE_UNDETERMINED_DEFAULT, + ctxt.systemConfig().getFileFixityChecksumAlgorithm(), null, false); + + if (!fileEntryName.equals(shortName)) { + // If the filename looks like a hierarchical folder name (i.e., contains slashes and backslashes), + // we'll extract the directory name; then subject it to some "aggressive sanitizing" - strip all + // the leading, trailing and duplicate slashes; then replace all the characters that + // don't pass our validation rules. + String directoryName = fileEntryName.replaceFirst("[\\\\/][\\\\/]*[^\\\\/]*$", ""); + directoryName = StringUtil.sanitizeFileDirectory(directoryName, true); + // if (!"".equals(directoryName)) { + if (!StringUtil.isEmpty(directoryName)) { + logger.fine("setting the directory label to " + directoryName); + datafile.getFileMetadata().setDirectoryLabel(directoryName); + } + } - if (fileEntryName != null && !fileEntryName.equals("")) { - - String shortName = fileEntryName.replaceFirst("^.*[\\/]", ""); - - // Check if it's a "fake" file - a zip archive entry - // created for a MacOS X filesystem element: (these - // start with "._") - if (!shortName.startsWith("._") && !shortName.startsWith(".DS_Store") && !"".equals(shortName)) { - // OK, this seems like an OK file entry - we'll try - // to read it and create a DataFile with it: - - String storageIdentifier = FileUtil.generateStorageIdentifier(); - File unzippedFile = new File(getFilesTempDirectory() + "/" + storageIdentifier); - Files.copy(unZippedIn, unzippedFile.toPath(), StandardCopyOption.REPLACE_EXISTING); - // No need to check the size of this unpacked file against the size limit, - // since we've already checked for that in the first pass. - - DataFile datafile = FileUtil.createSingleDataFile(version, null, storageIdentifier, shortName, - MIME_TYPE_UNDETERMINED_DEFAULT, - ctxt.systemConfig().getFileFixityChecksumAlgorithm(), null, false); - - if (!fileEntryName.equals(shortName)) { - // If the filename looks like a hierarchical folder name (i.e., contains slashes and backslashes), - // we'll extract the directory name; then subject it to some "aggressive sanitizing" - strip all - // the leading, trailing and duplicate slashes; then replace all the characters that - // don't pass our validation rules. - String directoryName = fileEntryName.replaceFirst("[\\\\/][\\\\/]*[^\\\\/]*$", ""); - directoryName = StringUtil.sanitizeFileDirectory(directoryName, true); - // if (!"".equals(directoryName)) { - if (!StringUtil.isEmpty(directoryName)) { - logger.fine("setting the directory label to " + directoryName); - datafile.getFileMetadata().setDirectoryLabel(directoryName); - } - } + if (datafile != null) { + // We have created this datafile with the mime type "unknown"; + // Now that we have it saved in a temporary location, + // let's try and determine its real type: + + String tempFileName = getFilesTempDirectory() + "/" + datafile.getStorageIdentifier(); - if (datafile != null) { - // We have created this datafile with the mime type "unknown"; - // Now that we have it saved in a temporary location, - // let's try and determine its real type: - - String tempFileName = getFilesTempDirectory() + "/" + datafile.getStorageIdentifier(); - - try { - recognizedType = determineFileType(unzippedFile, shortName); - // null the File explicitly, to release any open FDs: - unzippedFile = null; - logger.fine("File utility recognized unzipped file as " + recognizedType); - if (recognizedType != null && !recognizedType.equals("")) { - datafile.setContentType(recognizedType); - } - } catch (Exception ex) { - logger.warning("Failed to run the file utility mime type check on file " + fileName); - } - - datafiles.add(datafile); - combinedUnzippedFileSize += datafile.getFilesize(); + try { + recognizedType = determineFileType(unzippedFile, shortName); + // null the File explicitly, to release any open FDs: + unzippedFile = null; + logger.fine("File utility recognized unzipped file as " + recognizedType); + if (recognizedType != null && !recognizedType.equals("")) { + datafile.setContentType(recognizedType); } + } catch (Exception ex) { + logger.warning("Failed to run the file utility mime type check on file " + fileName); } + + datafiles.add(datafile); + combinedUnzippedFileSize += datafile.getFilesize(); } } - unZippedIn.closeEntry(); - } } catch (IOException ioex) { @@ -494,18 +433,7 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException //warningMessage = BundleUtil.getStringFromBundle("file.addreplace.warning.unzip.failed.quota", Arrays.asList(FileSizeChecker.bytesToHumanReadable(storageQuotaLimit))); //datafiles.clear(); throw new CommandExecutionException(fesqx.getMessage(), fesqx, this); - }*/ finally { - if (zipFile != null) { - try { - zipFile.close(); - } catch (Exception zEx) {} - } - if (unZippedIn != null) { - try { - unZippedIn.close(); - } catch (Exception zEx) {} - } - } + }*/ if (!datafiles.isEmpty()) { // remove the uploaded zip file: try { @@ -591,7 +519,8 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException // The try-catch is due to error encountered in using NFS for stocking file, // cf. https://github.com/IQSS/dataverse/issues/5909 try { - FileUtils.deleteDirectory(rezipFolder); + if (rezipFolder!=null) + FileUtils.deleteDirectory(rezipFolder); } catch (IOException ioex) { // do nothing - it's a temp folder. logger.warning("Could not remove temp folder, error message : " + ioex.getMessage()); @@ -730,7 +659,37 @@ public CreateDataFileResult execute(CommandContext ctxt) throws CommandException return CreateDataFileResult.error(fileName, finalType); } // end createDataFiles - + + private static List filteredZipEntries(ZipFile zipFile) { + var entries = Collections.list(zipFile.entries()).stream().filter(e -> { + var entryName = e.getName(); + logger.fine("ZipEntry, file: " + entryName); + return !e.isDirectory() && !entryName.isEmpty() && !isFileToSkip(entryName); + }).toList(); + return entries; + } + + private static ZipFile openZipFile(Path tempFile, Charset charset) throws IOException { + if (charset != null) { + return new ZipFile(tempFile.toFile(), charset); + } + else { + return new ZipFile(tempFile.toFile()); + } + } + + private static boolean isFileToSkip(String fileName) { + // check if it's a "fake" file - a zip archive entry + // created for a MacOS X filesystem element: (these + // start with "._") + var shortName = getShortName(fileName); + return shortName.startsWith("._") || shortName.startsWith(".DS_Store") || "".equals(shortName); + } + + private static String getShortName(String fileName) { + return fileName.replaceFirst("^.*[\\/]", ""); + } + @Override public Map> getRequiredPermissions() { Map> ret = new HashMap<>(); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java index bb5f5a71e24..dc8884405ef 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDatasetVersionCommand.java @@ -115,7 +115,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { */ if(persistedVersion==null) { Long id = getDataset().getLatestVersion().getId(); - persistedVersion = ctxt.datasetVersion().find(id!=null ? id: getDataset().getLatestVersionForCopy().getId()); + persistedVersion = ctxt.datasetVersion().find(id!=null ? id : getDataset().getLatestVersionForCopy(true).getId()); } //Will throw an IllegalCommandException if a system metadatablock is changed and the appropriate key is not supplied. diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseAttributeCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseAttributeCommand.java new file mode 100644 index 00000000000..57ac20fcee6 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseAttributeCommand.java @@ -0,0 +1,110 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; +import edu.harvard.iq.dataverse.engine.command.exception.PermissionException; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; + +import java.util.Collections; + +/** + * Command to update an existing Dataverse attribute. + */ +@RequiredPermissions(Permission.EditDataverse) +public class UpdateDataverseAttributeCommand extends AbstractCommand { + + private static final String ATTRIBUTE_ALIAS = "alias"; + private static final String ATTRIBUTE_NAME = "name"; + private static final String ATTRIBUTE_DESCRIPTION = "description"; + private static final String ATTRIBUTE_AFFILIATION = "affiliation"; + private static final String ATTRIBUTE_FILE_PIDS_ENABLED = "filePIDsEnabled"; + + private final Dataverse dataverse; + private final String attributeName; + private final Object attributeValue; + + public UpdateDataverseAttributeCommand(DataverseRequest request, Dataverse dataverse, String attributeName, Object attributeValue) { + super(request, dataverse); + this.dataverse = dataverse; + this.attributeName = attributeName; + this.attributeValue = attributeValue; + } + + @Override + public Dataverse execute(CommandContext ctxt) throws CommandException { + switch (attributeName) { + case ATTRIBUTE_ALIAS: + case ATTRIBUTE_NAME: + case ATTRIBUTE_DESCRIPTION: + case ATTRIBUTE_AFFILIATION: + setStringAttribute(attributeName, attributeValue); + break; + case ATTRIBUTE_FILE_PIDS_ENABLED: + setBooleanAttributeForFilePIDs(ctxt); + break; + default: + throw new IllegalCommandException("'" + attributeName + "' is not a supported attribute", this); + } + + return ctxt.engine().submit(new UpdateDataverseCommand(dataverse, null, null, getRequest(), null)); + } + + /** + * Helper method to set a string attribute. + * + * @param attributeName The name of the attribute. + * @param attributeValue The value of the attribute (must be a String). + * @throws IllegalCommandException if the provided attribute value is not of String type. + */ + private void setStringAttribute(String attributeName, Object attributeValue) throws IllegalCommandException { + if (!(attributeValue instanceof String stringValue)) { + throw new IllegalCommandException("'" + attributeName + "' requires a string value", this); + } + + switch (attributeName) { + case ATTRIBUTE_ALIAS: + dataverse.setAlias(stringValue); + break; + case ATTRIBUTE_NAME: + dataverse.setName(stringValue); + break; + case ATTRIBUTE_DESCRIPTION: + dataverse.setDescription(stringValue); + break; + case ATTRIBUTE_AFFILIATION: + dataverse.setAffiliation(stringValue); + break; + default: + throw new IllegalCommandException("Unsupported string attribute: " + attributeName, this); + } + } + + /** + * Helper method to handle the "filePIDsEnabled" boolean attribute. + * + * @param ctxt The command context. + * @throws PermissionException if the user doesn't have permission to modify this attribute. + */ + private void setBooleanAttributeForFilePIDs(CommandContext ctxt) throws CommandException { + if (!getRequest().getUser().isSuperuser()) { + throw new PermissionException("You must be a superuser to change this setting", + this, Collections.singleton(Permission.EditDataset), dataverse); + } + if (!ctxt.settings().isTrueForKey(SettingsServiceBean.Key.AllowEnablingFilePIDsPerCollection, false)) { + throw new PermissionException("Changing File PID policy per collection is not enabled on this server", + this, Collections.singleton(Permission.EditDataset), dataverse); + } + + if (!(attributeValue instanceof Boolean)) { + throw new IllegalCommandException("'" + ATTRIBUTE_FILE_PIDS_ENABLED + "' requires a boolean value", this); + } + + dataverse.setFilePIDsEnabled((Boolean) attributeValue); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java index bdb69dc918f..55cc3708097 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateDataverseCommand.java @@ -1,141 +1,143 @@ package edu.harvard.iq.dataverse.engine.command.impl; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetFieldType; -import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.Dataverse.DataverseType; -import edu.harvard.iq.dataverse.DataverseFieldTypeInputLevel; +import edu.harvard.iq.dataverse.api.dto.DataverseDTO; import edu.harvard.iq.dataverse.authorization.Permission; import static edu.harvard.iq.dataverse.dataverse.DataverseUtil.validateDataverseMetadataExternally; -import edu.harvard.iq.dataverse.engine.command.AbstractCommand; + import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; -import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; import java.util.ArrayList; import java.util.List; -import java.util.logging.Logger; /** * Update an existing dataverse. + * * @author michael */ -@RequiredPermissions( Permission.EditDataverse ) -public class UpdateDataverseCommand extends AbstractCommand { - private static final Logger logger = Logger.getLogger(UpdateDataverseCommand.class.getName()); - - private final Dataverse editedDv; - private final List facetList; +@RequiredPermissions(Permission.EditDataverse) +public class UpdateDataverseCommand extends AbstractWriteDataverseCommand { private final List featuredDataverseList; - private final List inputLevelList; + private final DataverseDTO updatedDataverseDTO; private boolean datasetsReindexRequired = false; - public UpdateDataverseCommand(Dataverse editedDv, List facetList, List featuredDataverseList, - DataverseRequest aRequest, List inputLevelList ) { - super(aRequest, editedDv); - this.editedDv = editedDv; - // add update template uses this command but does not - // update facet list or featured dataverses - if (facetList != null){ - this.facetList = new ArrayList<>(facetList); - } else { - this.facetList = null; - } - if (featuredDataverseList != null){ - this.featuredDataverseList = new ArrayList<>(featuredDataverseList); - } else { - this.featuredDataverseList = null; - } - if (inputLevelList != null){ - this.inputLevelList = new ArrayList<>(inputLevelList); - } else { - this.inputLevelList = null; - } - } - - @Override - public Dataverse execute(CommandContext ctxt) throws CommandException { - logger.fine("Entering update dataverse command"); - - // Perform any optional validation steps, if defined: - if (ctxt.systemConfig().isExternalDataverseValidationEnabled()) { - // For admins, an override of the external validation step may be enabled: - if (!(getUser().isSuperuser() && ctxt.systemConfig().isExternalValidationAdminOverrideEnabled())) { - String executable = ctxt.systemConfig().getDataverseValidationExecutable(); - boolean result = validateDataverseMetadataExternally(editedDv, executable, getRequest()); - - if (!result) { - String rejectionMessage = ctxt.systemConfig().getDataverseUpdateValidationFailureMsg(); - throw new IllegalCommandException(rejectionMessage, this); - } - } - } - - Dataverse oldDv = ctxt.dataverses().find(editedDv.getId()); - - DataverseType oldDvType = oldDv.getDataverseType(); - String oldDvAlias = oldDv.getAlias(); - String oldDvName = oldDv.getName(); - oldDv = null; - - Dataverse result = ctxt.dataverses().save(editedDv); - - if ( facetList != null ) { - ctxt.facets().deleteFacetsFor(result); - int i=0; - for ( DatasetFieldType df : facetList ) { - ctxt.facets().create(i++, df.getId(), result.getId()); - } - } - if ( featuredDataverseList != null ) { - ctxt.featuredDataverses().deleteFeaturedDataversesFor(result); - int i=0; - for ( Object obj : featuredDataverseList ) { - Dataverse dv = (Dataverse) obj; - ctxt.featuredDataverses().create(i++, dv.getId(), result.getId()); + public UpdateDataverseCommand(Dataverse dataverse, + List facets, + List featuredDataverses, + DataverseRequest request, + List inputLevels) { + this(dataverse, facets, featuredDataverses, request, inputLevels, null, null); + } + + public UpdateDataverseCommand(Dataverse dataverse, + List facets, + List featuredDataverses, + DataverseRequest request, + List inputLevels, + List metadataBlocks, + DataverseDTO updatedDataverseDTO) { + super(dataverse, dataverse, request, facets, inputLevels, metadataBlocks); + if (featuredDataverses != null) { + this.featuredDataverseList = new ArrayList<>(featuredDataverses); + } else { + this.featuredDataverseList = null; + } + this.updatedDataverseDTO = updatedDataverseDTO; + } + + @Override + protected Dataverse innerExecute(CommandContext ctxt) throws IllegalCommandException { + // Perform any optional validation steps, if defined: + if (ctxt.systemConfig().isExternalDataverseValidationEnabled()) { + // For admins, an override of the external validation step may be enabled: + if (!(getUser().isSuperuser() && ctxt.systemConfig().isExternalValidationAdminOverrideEnabled())) { + String executable = ctxt.systemConfig().getDataverseValidationExecutable(); + boolean result = validateDataverseMetadataExternally(dataverse, executable, getRequest()); + + if (!result) { + String rejectionMessage = ctxt.systemConfig().getDataverseUpdateValidationFailureMsg(); + throw new IllegalCommandException(rejectionMessage, this); } } - if ( inputLevelList != null ) { - ctxt.fieldTypeInputLevels().deleteFacetsFor(result); - for ( DataverseFieldTypeInputLevel obj : inputLevelList ) { - ctxt.fieldTypeInputLevels().create(obj); - } + } + + Dataverse oldDv = ctxt.dataverses().find(dataverse.getId()); + + DataverseType oldDvType = oldDv.getDataverseType(); + String oldDvAlias = oldDv.getAlias(); + String oldDvName = oldDv.getName(); + + // We don't want to reindex the children datasets unnecessarily: + // When these values are changed we need to reindex all children datasets + // This check is not recursive as all the values just report the immediate parent + if (!oldDvType.equals(dataverse.getDataverseType()) + || !oldDvName.equals(dataverse.getName()) + || !oldDvAlias.equals(dataverse.getAlias())) { + datasetsReindexRequired = true; + } + + if (featuredDataverseList != null) { + ctxt.featuredDataverses().deleteFeaturedDataversesFor(dataverse); + int i = 0; + for (Object obj : featuredDataverseList) { + Dataverse dv = (Dataverse) obj; + ctxt.featuredDataverses().create(i++, dv.getId(), dataverse.getId()); } - - // We don't want to reindex the children datasets unnecessarily: - // When these values are changed we need to reindex all children datasets - // This check is not recursive as all the values just report the immediate parent - if (!oldDvType.equals(editedDv.getDataverseType()) - || !oldDvName.equals(editedDv.getName()) - || !oldDvAlias.equals(editedDv.getAlias())) { - datasetsReindexRequired = true; + } + + if (updatedDataverseDTO != null) { + updateDataverseFromDTO(dataverse, updatedDataverseDTO); + } + + return dataverse; + } + + private void updateDataverseFromDTO(Dataverse dataverse, DataverseDTO dto) { + if (dto.getAlias() != null) { + dataverse.setAlias(dto.getAlias()); + } + if (dto.getName() != null) { + dataverse.setName(dto.getName()); + } + if (dto.getDescription() != null) { + dataverse.setDescription(dto.getDescription()); + } + if (dto.getAffiliation() != null) { + dataverse.setAffiliation(dto.getAffiliation()); + } + if (dto.getDataverseContacts() != null) { + dataverse.setDataverseContacts(dto.getDataverseContacts()); + for (DataverseContact dc : dataverse.getDataverseContacts()) { + dc.setDataverse(dataverse); } - - return result; - } - + } + if (dto.getDataverseType() != null) { + dataverse.setDataverseType(dto.getDataverseType()); + } + } + @Override public boolean onSuccess(CommandContext ctxt, Object r) { - + // first kick of async index of datasets // TODO: is this actually needed? Is there a better way to handle // It appears that we at some point lost some extra logic here, where // we only reindex the underlying datasets if one or more of the specific set - // of fields have been changed (since these values are included in the - // indexed solr documents for dataasets). So I'm putting that back. -L.A. + // of fields have been changed (since these values are included in the + // indexed solr documents for datasets). So I'm putting that back. -L.A. Dataverse result = (Dataverse) r; - + if (datasetsReindexRequired) { List datasets = ctxt.datasets().findByOwnerId(result.getId()); ctxt.index().asyncIndexDatasetList(datasets, true); } - - return ctxt.dataverses().index((Dataverse) r); - } + return ctxt.dataverses().index((Dataverse) r); + } } - diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateHarvestedDatasetCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateHarvestedDatasetCommand.java new file mode 100644 index 00000000000..09563686299 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateHarvestedDatasetCommand.java @@ -0,0 +1,202 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.FileMetadata; +import static edu.harvard.iq.dataverse.search.IndexServiceBean.solrDocIdentifierFile; +import edu.harvard.iq.dataverse.util.StringUtil; +import java.io.IOException; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.logging.Level; +import java.util.logging.Logger; +import org.apache.solr.client.solrj.SolrServerException; + +/** + * + * @author landreev + * + * Much simplified version of UpdateDatasetVersionCommand, + * but with some extra twists. The goal is to avoid creating new Dataset and + * DataFile objects, and to instead preserve the database ids of the re-harvested + * datasets and files, whenever possible. This in turn allows us to avoid deleting + * and rebuilding from scratch the Solr documents for these objects. + */ +@RequiredPermissions(Permission.EditDataset) +public class UpdateHarvestedDatasetCommand extends AbstractDatasetCommand { + + private static final Logger logger = Logger.getLogger(UpdateHarvestedDatasetCommand.class.getCanonicalName()); + private final DatasetVersion newHarvestedVersion; + final private boolean validateLenient = true; + + public UpdateHarvestedDatasetCommand(Dataset theDataset, DatasetVersion newHarvestedVersion, DataverseRequest aRequest) { + super(aRequest, theDataset); + this.newHarvestedVersion = newHarvestedVersion; + } + + public boolean isValidateLenient() { + return validateLenient; + } + + @Override + public Dataset execute(CommandContext ctxt) throws CommandException { + + Dataset existingDataset = getDataset(); + + if (existingDataset == null + || existingDataset.getId() == null + || !existingDataset.isHarvested() + || existingDataset.getVersions().size() != 1) { + throw new IllegalCommandException("The command can only be called on an existing harvested dataset with only 1 version", this); + } + DatasetVersion existingVersion = existingDataset.getVersions().get(0); + + if (newHarvestedVersion == null || newHarvestedVersion.getId() != null) { + throw new IllegalCommandException("The command can only be called with a newly-harvested, not yet saved DatasetVersion supplied", this); + } + + newHarvestedVersion.setCreateTime(getTimestamp()); + newHarvestedVersion.setLastUpdateTime(getTimestamp()); + + + Map existingFilesIndex = new HashMap<>(); + + /* + Create a map of the files that are currently part of this existing + harvested dataset. We assume that a harvested file can be uniquely + defined by its storageidentifier. Which, in the case of a datafile + harvested from another Dataverse should be its data access api url. + */ + for (int i = 0; i < existingDataset.getFiles().size(); i++) { + String storageIdentifier = existingDataset.getFiles().get(i).getStorageIdentifier(); + if (!StringUtil.isEmpty(storageIdentifier)) { + existingFilesIndex.put(storageIdentifier, i); + } + } + + /* + Go through the files in the newly-harvested version and check if any of + them are (potentially new/updated) versions of files that we already + have, harvested previously from the same archive location. + */ + for (FileMetadata newFileMetadata : newHarvestedVersion.getFileMetadatas()) { + // is it safe to assume that each new FileMetadata will be + // pointing to a non-null DataFile here? + String storageIdentifier = newFileMetadata.getDataFile().getStorageIdentifier(); + if (!StringUtil.isEmpty(storageIdentifier) && existingFilesIndex.containsKey(storageIdentifier)) { + newFileMetadata.getDataFile().setFileMetadatas(new ArrayList<>()); + + int fileIndex = existingFilesIndex.get(storageIdentifier); + + // Make sure to update the existing DataFiles that we are going + // to keep in case their newly-harvested versions have different + // checksums, mime types etc. These values are supposed to be + // immutable, normally - but who knows, errors happen, the source + // Dataverse may have had to fix these in their database to + // correct a data integrity issue (for ex.): + existingDataset.getFiles().get(fileIndex).setContentType(newFileMetadata.getDataFile().getContentType()); + existingDataset.getFiles().get(fileIndex).setFilesize(newFileMetadata.getDataFile().getFilesize()); + existingDataset.getFiles().get(fileIndex).setChecksumType(newFileMetadata.getDataFile().getChecksumType()); + existingDataset.getFiles().get(fileIndex).setChecksumValue(newFileMetadata.getDataFile().getChecksumValue()); + + // Point the newly-harvested filemetadata to the existing datafile: + newFileMetadata.setDataFile(existingDataset.getFiles().get(fileIndex)); + + // Make sure this new FileMetadata is the only one attached to this existing file: + existingDataset.getFiles().get(fileIndex).setFileMetadatas(new ArrayList<>(1)); + existingDataset.getFiles().get(fileIndex).getFileMetadatas().add(newFileMetadata); + // (we don't want any cascade relationships left between this existing + // dataset and this version, since we are going to attemp to delete it). + + // Drop the file from the index map: + existingFilesIndex.remove(storageIdentifier); + } + } + + // @todo? check if there's anything special that needs to be done for things + // like file categories + + List solrIdsOfDocumentsToDelete = new ArrayList<>(); + + // Go through the existing files and delete the ones that are + // no longer present in the version that we have just harvesed: + for (FileMetadata oldFileMetadata : existingDataset.getVersions().get(0).getFileMetadatas()) { + DataFile oldDataFile = oldFileMetadata.getDataFile(); + String storageIdentifier = oldDataFile.getStorageIdentifier(); + // Is it still in the existing files map? - that means it is no longer + // present in the newly-harvested version + if (StringUtil.isEmpty(storageIdentifier) || existingFilesIndex.containsKey(storageIdentifier)) { + solrIdsOfDocumentsToDelete.add(solrDocIdentifierFile + oldDataFile.getId()); + existingDataset.getFiles().remove(oldDataFile); + // Files from harvested datasets are removed unceremoniously, + // directly in the database. No need to bother calling the + // DeleteFileCommand on them. We'll just need to remember to purge + // them from Solr as well (right below) + ctxt.em().remove(ctxt.em().merge(oldDataFile)); + // (no need to explicitly remove the oldFileMetadata; it will be + // removed with the entire old version is deleted) + } + } + + // purge all the SOLR documents associated with the files + // we have just deleted: + if (!solrIdsOfDocumentsToDelete.isEmpty()) { + ctxt.index().deleteHarvestedDocuments(solrIdsOfDocumentsToDelete); + } + + // ... And now delete the existing version itself: + existingDataset.setVersions(new ArrayList<>()); + existingVersion.setDataset(null); + + existingVersion = ctxt.em().merge(existingVersion); + ctxt.em().remove(existingVersion); + + // Now attach the newly-harvested version to the dataset: + existingDataset.getVersions().add(newHarvestedVersion); + newHarvestedVersion.setDataset(existingDataset); + + // ... There's one more thing to do - go through the new files, + // that are not in the database yet, and make sure they are + // attached to this existing dataset, instead of the dummy temp + // dataset into which they were originally imported: + for (FileMetadata newFileMetadata : newHarvestedVersion.getFileMetadatas()) { + if (newFileMetadata.getDataFile().getId() == null) { + existingDataset.getFiles().add(newFileMetadata.getDataFile()); + newFileMetadata.getDataFile().setOwner(existingDataset); + } + } + + ctxt.em().persist(newHarvestedVersion); + + Dataset savedDataset = ctxt.em().merge(existingDataset); + ctxt.em().flush(); + + return savedDataset; + } + + @Override + public boolean onSuccess(CommandContext ctxt, Object r) { + boolean retVal = true; + Dataset d = (Dataset) r; + + try { + // Note that we index harvested datasets synchronously: + ctxt.index().indexDataset(d, true); + } catch (SolrServerException|IOException solrServerEx) { + logger.log(Level.WARNING, "Exception while trying to index the updated Harvested dataset " + d.getGlobalId().asString(), solrServerEx.getMessage()); + retVal = false; + } + + return retVal; + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java index edd01ae98a3..d76020cb8d8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/DDIExportServiceBean.java @@ -98,8 +98,10 @@ public class DDIExportServiceBean { public static final String LEVEL_FILE = "file"; public static final String NOTE_TYPE_UNF = "VDC:UNF"; public static final String NOTE_TYPE_TAG = "DATAVERSE:TAG"; + public static final String NOTE_TYPE_FILEDESCRIPTION = "DATAVERSE:FILEDESC"; public static final String NOTE_SUBJECT_UNF = "Universal Numeric Fingerprint"; public static final String NOTE_SUBJECT_TAG = "Data File Tag"; + public static final String NOTE_SUBJECT_FILEDESCRIPTION = "DataFile Description"; /* * Internal service objects: @@ -742,11 +744,6 @@ private void createFileDscr(XMLStreamWriter xmlw, Set excludedFieldSet, xmlw.writeEndElement(); // fileName } - /* - xmlw.writeStartElement("fileCont"); - xmlw.writeCharacters( df.getContentType() ); - xmlw.writeEndElement(); // fileCont - */ // dimensions if (checkField("dimensns", excludedFieldSet, includedFieldSet)) { if (dt.getCaseQuantity() != null || dt.getVarQuantity() != null || dt.getRecordsPerCase() != null) { @@ -801,26 +798,6 @@ private void createFileDscr(XMLStreamWriter xmlw, Set excludedFieldSet, xmlw.writeEndElement(); // notes } - /* - xmlw.writeStartElement("notes"); - writeAttribute( xmlw, "type", "vdc:category" ); - xmlw.writeCharacters( fm.getCategory() ); - xmlw.writeEndElement(); // notes - */ - // A special note for LOCKSS crawlers indicating the restricted - // status of the file: - - /* - if (tdf != null && isRestrictedFile(tdf)) { - xmlw.writeStartElement("notes"); - writeAttribute( xmlw, "type", NOTE_TYPE_LOCKSS_CRAWL ); - writeAttribute( xmlw, "level", LEVEL_FILE ); - writeAttribute( xmlw, "subject", NOTE_SUBJECT_LOCKSS_PERM ); - xmlw.writeCharacters( "restricted" ); - xmlw.writeEndElement(); // notes - - } - */ if (checkField("tags", excludedFieldSet, includedFieldSet) && df.getTags() != null) { for (int i = 0; i < df.getTags().size(); i++) { xmlw.writeStartElement("notes"); @@ -831,6 +808,17 @@ private void createFileDscr(XMLStreamWriter xmlw, Set excludedFieldSet, xmlw.writeEndElement(); // notes } } + + // A dedicated node for the Description entry + if (!StringUtilisEmpty(fm.getDescription())) { + xmlw.writeStartElement("notes"); + xmlw.writeAttribute("level", LEVEL_FILE); + xmlw.writeAttribute("type", NOTE_TYPE_FILEDESCRIPTION); + xmlw.writeAttribute("subject", NOTE_SUBJECT_FILEDESCRIPTION); + xmlw.writeCharacters(fm.getDescription()); + xmlw.writeEndElement(); // notes + } + xmlw.writeEndElement(); // fileDscr } diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index f5efc448090..05ddbe83e78 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -14,8 +14,10 @@ import static edu.harvard.iq.dataverse.export.DDIExportServiceBean.LEVEL_FILE; import static edu.harvard.iq.dataverse.export.DDIExportServiceBean.NOTE_SUBJECT_TAG; import static edu.harvard.iq.dataverse.export.DDIExportServiceBean.NOTE_SUBJECT_UNF; +import static edu.harvard.iq.dataverse.export.DDIExportServiceBean.NOTE_SUBJECT_FILEDESCRIPTION; import static edu.harvard.iq.dataverse.export.DDIExportServiceBean.NOTE_TYPE_TAG; import static edu.harvard.iq.dataverse.export.DDIExportServiceBean.NOTE_TYPE_UNF; +import static edu.harvard.iq.dataverse.export.DDIExportServiceBean.NOTE_TYPE_FILEDESCRIPTION; import edu.harvard.iq.dataverse.export.DDIExporter; import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; @@ -1901,6 +1903,8 @@ private static void createFileDscr(XMLStreamWriter xmlw, JsonArray fileDetails) xmlw.writeEndElement(); // notes } + // If any tabular tags are present, each is formatted in a + // dedicated note: if (fileJson.containsKey("tabularTags")) { JsonArray tags = fileJson.getJsonArray("tabularTags"); for (int j = 0; j < tags.size(); j++) { @@ -1912,6 +1916,17 @@ private static void createFileDscr(XMLStreamWriter xmlw, JsonArray fileDetails) xmlw.writeEndElement(); // notes } } + + // Adding a dedicated node for the description entry (for + // non-tabular files we format it under the field) + if (fileJson.containsKey("description")) { + xmlw.writeStartElement("notes"); + xmlw.writeAttribute("level", LEVEL_FILE); + xmlw.writeAttribute("type", NOTE_TYPE_FILEDESCRIPTION); + xmlw.writeAttribute("subject", NOTE_SUBJECT_FILEDESCRIPTION); + xmlw.writeCharacters(fileJson.getString("description")); + xmlw.writeEndElement(); // notes + } // TODO: add the remaining fileDscr elements! xmlw.writeEndElement(); // fileDscr diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceShapefileHelper.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceShapefileHelper.java index 8c5dad237b1..27a2ab99376 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceShapefileHelper.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceShapefileHelper.java @@ -100,71 +100,48 @@ public IngestServiceShapefileHelper(File zippedShapefile, File rezipFolder){ //this.processFile(zippedShapefile, rezipFolder); } - - private FileInputStream getFileInputStream(File fileObject){ - if (fileObject==null){ - return null; - } - try { + + private FileInputStream getFileInputStream(File fileObject){ + if (fileObject==null){ + return null; + } + try { return new FileInputStream(fileObject); } catch (FileNotFoundException ex) { logger.severe("Failed to create FileInputStream from File: " + fileObject.getAbsolutePath()); return null; } - } - - private void closeFileInputStream(FileInputStream fis){ - if (fis==null){ - return; - } + } + + private void closeFileInputStream(FileInputStream fis){ + if (fis==null){ + return; + } try { - fis.close(); + fis.close(); } catch (IOException ex) { logger.info("Failed to close FileInputStream"); } - } - + } + public boolean processFile() { if ((!isValidFile(this.zippedShapefile))||(!isValidFolder(this.rezipFolder))){ return false; } - - // (1) Use the ShapefileHandler to the .zip for a shapefile - // - FileInputStream shpfileInputStream = this.getFileInputStream(zippedShapefile); - if (shpfileInputStream==null){ - return false; - } - - this.shpHandler = new ShapefileHandler(shpfileInputStream); - if (!shpHandler.containsShapefile()){ - logger.severe("Shapefile was incorrectly detected upon Ingest (FileUtil) and passed here"); - return false; - } - - this.closeFileInputStream(shpfileInputStream); - - // (2) Rezip the shapefile pieces - logger.info("rezipFolder: " + rezipFolder.getAbsolutePath()); - shpfileInputStream = this.getFileInputStream(zippedShapefile); - if (shpfileInputStream==null){ - return false; - } - - boolean rezipSuccess; try { - rezipSuccess = shpHandler.rezipShapefileSets(shpfileInputStream, rezipFolder); + this.shpHandler = new ShapefileHandler(zippedShapefile); + if (!shpHandler.containsShapefile()){ + logger.severe("Shapefile was incorrectly detected upon Ingest (FileUtil) and passed here"); + return false; + } + logger.info("rezipFolder: " + rezipFolder.getAbsolutePath()); + return shpHandler.rezipShapefileSets(rezipFolder); } catch (IOException ex) { logger.severe("Shapefile was not correctly unpacked/repacked"); logger.severe("shpHandler message: " + shpHandler.errorMessage); return false; } - - this.closeFileInputStream(shpfileInputStream); - - return rezipSuccess; - // return createDataFiles(rezipFolder); } diff --git a/src/main/java/edu/harvard/iq/dataverse/makedatacount/DatasetExternalCitationsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/makedatacount/DatasetExternalCitationsServiceBean.java index 50c24274bb2..fa56432cc3c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/makedatacount/DatasetExternalCitationsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/makedatacount/DatasetExternalCitationsServiceBean.java @@ -7,6 +7,9 @@ import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetServiceBean; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; + import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -40,7 +43,8 @@ public class DatasetExternalCitationsServiceBean implements java.io.Serializable Arrays.asList( "cites", "references", - "supplements")); + "supplements", + "is-supplement-to")); static ArrayList outboundRelationships = new ArrayList( Arrays.asList( "is-cited-by", @@ -59,12 +63,11 @@ public List parseCitations(JsonArray citations) { if (inboundRelationships.contains(relationship)) { Dataset localDs = null; if (objectUri.contains("doi")) { - String globalId = objectUri.replace("https://", "").replace("doi.org/", "doi:").toUpperCase().replace("DOI:", "doi:"); - localDs = datasetService.findByGlobalId(globalId); + localDs = datasetService.findByGlobalId(objectUri); exCit.setDataset(localDs); } exCit.setCitedByUrl(subjectUri); - + if (localDs != null && !exCit.getCitedByUrl().isEmpty()) { datasetExternalCitations.add(exCit); } @@ -72,9 +75,9 @@ public List parseCitations(JsonArray citations) { if (outboundRelationships.contains(relationship)) { Dataset localDs = null; if (subjectUri.contains("doi")) { - String globalId = subjectUri.replace("https://", "").replace("doi.org/", "doi:").toUpperCase().replace("DOI:", "doi:"); - localDs = datasetService.findByGlobalId(globalId); + localDs = datasetService.findByGlobalId(subjectUri); exCit.setDataset(localDs); + } exCit.setCitedByUrl(objectUri); diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java index a74474efa15..5bdbeac031d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java @@ -168,25 +168,12 @@ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) { } } - // Note that this SQL line in the code below: - // datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) - // behaves somewhat counter-intuitively if the versionnumber and/or - // minorversionnumber is/are NULL - it results in an empty string - // (NOT the string "{dataset_id}:", in other words). Some harvested - // versions do not have version numbers (only the ones harvested from - // other Dataverses!) It works fine - // for our purposes below, because we are simply counting the selected - // lines - i.e. we don't care if some of these lines are empty. - // But do not use this notation if you need the values returned to - // meaningfully identify the datasets! - - Query query = em.createNativeQuery( "select count(*)\n" + "from (\n" - + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" + + "select DISTINCT ON (datasetversion.dataset_id) datasetversion.dataset_id \n" + "from datasetversion\n" + "join dataset on dataset.id = datasetversion.dataset_id\n" + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n") @@ -194,7 +181,7 @@ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) { + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n ") + "and \n" + dataLocationLine // be careful about adding more and statements after this line. - + "group by dataset_id \n" + + " order by datasetversion.dataset_id, datasetversion.versionnumber desc, datasetversion.minorversionnumber desc\n" +") sub_temp" ); logger.log(Level.FINE, "Metric query: {0}", query); @@ -207,15 +194,15 @@ public List datasetsBySubjectToMonth(String yyyymm, String dataLocatio // A published local datasets may have more than one released version! // So that's why we have to jump through some extra hoops below // in order to select the latest one: - String originClause = "(datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in\n" + + String originClause = "(datasetversion.id in\n" + "(\n" + - "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" + + "select DISTINCT ON (datasetversion.dataset_id) datasetversion.id\n" + " from datasetversion\n" + " join dataset on dataset.id = datasetversion.dataset_id\n" + " where versionstate='RELEASED'\n" + " and dataset.harvestingclient_id is null\n" + " and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n" + - " group by dataset_id\n" + + " order by datasetversion.dataset_id, datasetversion.versionnumber desc, datasetversion.minorversionnumber desc\n" + "))\n"; if (!DATA_LOCATION_LOCAL.equals(dataLocation)) { // Default api state is DATA_LOCATION_LOCAL @@ -273,7 +260,7 @@ public long datasetsPastDays(int days, String dataLocation, Dataverse d) { Query query = em.createNativeQuery( "select count(*)\n" + "from (\n" - + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max\n" + + "select DISTINCT ON (datasetversion.dataset_id) datasetversion.id\n" + "from datasetversion\n" + "join dataset on dataset.id = datasetversion.dataset_id\n" + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n") @@ -281,7 +268,7 @@ public long datasetsPastDays(int days, String dataLocation, Dataverse d) { + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n") + "and \n" + dataLocationLine // be careful about adding more and statements after this line. - + "group by dataset_id \n" + + " order by datasetversion.dataset_id, datasetversion.versionnumber desc, datasetversion.minorversionnumber desc \n" +") sub_temp" ); logger.log(Level.FINE, "Metric query: {0}", query); @@ -322,9 +309,9 @@ public long filesToMonth(String yyyymm, Dataverse d) { + "select count(*)\n" + "from filemetadata\n" + "join datasetversion on datasetversion.id = filemetadata.datasetversion_id\n" - + "where datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in \n" + + "where datasetversion.id in \n" + "(\n" - + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n" + + "select DISTINCT ON (datasetversion.dataset_id) datasetversion.id \n" + "from datasetversion\n" + "join dataset on dataset.id = datasetversion.dataset_id\n" + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n") @@ -332,7 +319,7 @@ public long filesToMonth(String yyyymm, Dataverse d) { + ((d == null) ? "" : "and dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n") + "and date_trunc('month', releasetime) <= to_date('" + yyyymm + "','YYYY-MM')\n" + "and dataset.harvestingclient_id is null\n" - + "group by dataset_id \n" + + "order by datasetversion.dataset_id, datasetversion.versionnumber desc, datasetversion.minorversionnumber \n" + ");" ); logger.log(Level.FINE, "Metric query: {0}", query); @@ -345,9 +332,9 @@ public long filesPastDays(int days, Dataverse d) { + "select count(*)\n" + "from filemetadata\n" + "join datasetversion on datasetversion.id = filemetadata.datasetversion_id\n" - + "where datasetversion.dataset_id || ':' || datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber) in \n" + + "where datasetversion.id in \n" + "(\n" - + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber)) as max \n" + + "select DISTINCT ON (datasetversion.dataset_id) datasetversion.id \n" + "from datasetversion\n" + "join dataset on dataset.id = datasetversion.dataset_id\n" + ((d == null) ? "" : "join dvobject on dvobject.id = dataset.id\n") @@ -355,7 +342,7 @@ public long filesPastDays(int days, Dataverse d) { + "and releasetime > current_date - interval '" + days + "' day\n" + ((d == null) ? "" : "AND dvobject.owner_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataverse") + ")\n") + "and dataset.harvestingclient_id is null\n" - + "group by dataset_id \n" + + "order by datasetversion.dataset_id, datasetversion.versionnumber desc, datasetversion.minorversionnumber desc \n" + ");" ); logger.log(Level.FINE, "Metric query: {0}", query); diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java index f6d142aac96..250eae7e5fc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/AbstractPidProvider.java @@ -36,9 +36,9 @@ public abstract class AbstractPidProvider implements PidProvider { private String datafilePidFormat = null; - private HashSet managedSet; + protected HashSet managedSet = new HashSet(); - private HashSet excludedSet; + protected HashSet excludedSet = new HashSet(); private String id; private String label; @@ -47,8 +47,6 @@ protected AbstractPidProvider(String id, String label, String protocol) { this.id = id; this.label = label; this.protocol = protocol; - this.managedSet = new HashSet(); - this.excludedSet = new HashSet(); } protected AbstractPidProvider(String id, String label, String protocol, String authority, String shoulder, @@ -60,8 +58,12 @@ protected AbstractPidProvider(String id, String label, String protocol, String a this.shoulder = shoulder; this.identifierGenerationStyle = identifierGenerationStyle; this.datafilePidFormat = datafilePidFormat; - this.managedSet = new HashSet(Arrays.asList(managedList.split(",\\s"))); - this.excludedSet = new HashSet(Arrays.asList(excludedList.split(",\\s"))); + if(!managedList.isEmpty()) { + this.managedSet.addAll(Arrays.asList(managedList.split(",\\s"))); + } + if(!excludedList.isEmpty()) { + this.excludedSet.addAll(Arrays.asList(excludedList.split(",\\s"))); + } if (logger.isLoggable(Level.FINE)) { Iterator iter = managedSet.iterator(); while (iter.hasNext()) { @@ -313,10 +315,17 @@ protected GlobalId parsePersistentId(String protocol, String identifierString) { } public GlobalId parsePersistentId(String protocol, String authority, String identifier) { + return parsePersistentId(protocol, authority, identifier, false); + } + + public GlobalId parsePersistentId(String protocol, String authority, String identifier, boolean isCaseInsensitive) { logger.fine("Parsing: " + protocol + ":" + authority + getSeparator() + identifier + " in " + getId()); if (!PidProvider.isValidGlobalId(protocol, authority, identifier)) { return null; } + if(isCaseInsensitive) { + identifier = identifier.toUpperCase(); + } // Check authority/identifier if this is a provider that manages specific // identifiers // /is not one of the unmanaged providers that has null authority @@ -333,7 +342,7 @@ public GlobalId parsePersistentId(String protocol, String authority, String iden logger.fine("managed in " + getId() + ": " + getManagedSet().contains(cleanIdentifier)); logger.fine("excluded from " + getId() + ": " + getExcludedSet().contains(cleanIdentifier)); - if (!(((authority.equals(getAuthority()) && identifier.startsWith(getShoulder())) + if (!(((authority.equals(getAuthority()) && identifier.startsWith(getShoulder().toUpperCase())) || getManagedSet().contains(cleanIdentifier)) && !getExcludedSet().contains(cleanIdentifier))) { return null; } diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java index 02a7dedce47..70ce1ec4c14 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse.pidproviders.doi; import java.util.Arrays; +import java.util.HashSet; import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; @@ -26,9 +27,24 @@ public abstract class AbstractDOIProvider extends AbstractPidProvider { public AbstractDOIProvider(String id, String label, String providerAuthority, String providerShoulder, String identifierGenerationStyle, String datafilePidFormat, String managedList, String excludedList) { super(id, label, DOI_PROTOCOL, providerAuthority, providerShoulder, identifierGenerationStyle, datafilePidFormat, managedList, excludedList); + //Create case insensitive (converted toUpperCase) managedSet and excludedSet + managedSet = clean(managedSet, "managed"); + excludedSet = clean(excludedSet, "excluded"); } - //For Unmanged provider + private HashSet clean(HashSet originalSet, String setName) { + HashSet cleanSet = new HashSet(); + for(String entry: originalSet) { + if(entry.startsWith(DOI_PROTOCOL)) { + cleanSet.add(DOI_PROTOCOL + entry.substring(DOI_PROTOCOL.length()).toUpperCase()); + } else { + logger.warning("Non-DOI found in " + setName + " set of pidProvider id: " + getId() + ": " + entry + ". Entry is being dropped."); + } + } + return cleanSet; + } + + //For Unmanaged provider public AbstractDOIProvider(String name, String label) { super(name, label, DOI_PROTOCOL); } @@ -67,7 +83,7 @@ public GlobalId parsePersistentId(String protocol, String authority, String iden if (!DOI_PROTOCOL.equals(protocol)) { return null; } - return super.parsePersistentId(protocol, authority, identifier); + return super.parsePersistentId(protocol, authority, identifier, true); } public String getUrlPrefix() { diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index a74a9f34bc9..8199b7d9c9f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -1317,8 +1317,8 @@ private void writeDescriptions(XMLStreamWriter xmlw, DvObject dvObject, boolean } if (StringUtils.isNotBlank(softwareName)) { if (StringUtils.isNotBlank(softwareVersion)) { + softwareName = softwareName + ", " + softwareVersion; } - softwareName = softwareName + ", " + softwareVersion; descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, softwareName); } diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java index 5630844fb32..b07cd027a01 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DataCiteDOIProvider.java @@ -226,8 +226,7 @@ protected String getProviderKeyName() { @Override public String getProviderType() { - // TODO Auto-generated method stub - return null; + return TYPE; } public String getMdsUrl() { diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/HandlePidProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/HandlePidProvider.java index 9d61663d034..1f03d8a6cfb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/HandlePidProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/handle/HandlePidProvider.java @@ -59,6 +59,11 @@ * service. * As of now, it only does the registration updates, to accommodate * the modifyRegistration datasets API sub-command. + * + * Note that while Handles are nominally case sensitive, handle.net is + * configured to be case-insensitive and Dataverse makes case-insensitve + * database look-ups to find Handles (See #11003). That said, database + * entries are stored in the case matching the configuration of the provider. */ public class HandlePidProvider extends AbstractPidProvider { diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/PermaLinkPidProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/PermaLinkPidProvider.java index 7b55292350f..2cc0d41ede7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/PermaLinkPidProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/perma/PermaLinkPidProvider.java @@ -24,6 +24,9 @@ * overridable by a configurable parameter to support use of an external * resolver. * + * Note that while PermaLinks are nominally case sensitive, Dataverse makes + * case-insensitve database look-ups to find them (See #11003). That said, database + * entries are stored in the case matching the configuration of the provider. */ public class PermaLinkPidProvider extends AbstractPidProvider { diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 17dc6726a5a..fb676242fba 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -423,7 +423,7 @@ synchronized private static Dataset getNextToIndex(Long id, Dataset d) { public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) { try { acquirePermitFromSemaphore(); - doAyncIndexDataset(dataset, doNormalSolrDocCleanUp); + doAsyncIndexDataset(dataset, doNormalSolrDocCleanUp); } catch (InterruptedException e) { String failureLogText = "Indexing failed: interrupted. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); failureLogText += "\r\n" + e.getLocalizedMessage(); @@ -433,7 +433,7 @@ public void asyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) { } } - private void doAyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) { + private void doAsyncIndexDataset(Dataset dataset, boolean doNormalSolrDocCleanUp) { Long id = dataset.getId(); Dataset next = getNextToIndex(id, dataset); // if there is an ongoing index job for this dataset, next is null (ongoing index job will reindex the newest version after current indexing finishes) while (next != null) { @@ -454,7 +454,7 @@ public void asyncIndexDatasetList(List datasets, boolean doNormalSolrDo for(Dataset dataset : datasets) { try { acquirePermitFromSemaphore(); - doAyncIndexDataset(dataset, true); + doAsyncIndexDataset(dataset, true); } catch (InterruptedException e) { String failureLogText = "Indexing failed: interrupted. You can kickoff a re-index of this dataset with: \r\n curl http://localhost:8080/api/admin/index/datasets/" + dataset.getId().toString(); failureLogText += "\r\n" + e.getLocalizedMessage(); @@ -2465,6 +2465,11 @@ public void deleteHarvestedDocuments(Dataset harvestedDataset) { solrIdsOfDocumentsToDelete.add(solrDocIdentifierFile + datafile.getId()); } + deleteHarvestedDocuments(solrIdsOfDocumentsToDelete); + } + + public void deleteHarvestedDocuments(List solrIdsOfDocumentsToDelete) { + logger.fine("attempting to delete the following documents from the index: " + StringUtils.join(solrIdsOfDocumentsToDelete, ",")); IndexResponse resultOfAttemptToDeleteDocuments = solrIndexService.deleteMultipleSolrIds(solrIdsOfDocumentsToDelete); logger.fine("result of attempt to delete harvested documents: " + resultOfAttemptToDeleteDocuments + "\n"); diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java index 4f3f6e46e48..9328dd03ca2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchIncludeFragment.java @@ -34,6 +34,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.MissingResourceException; import java.util.Optional; import java.util.Set; import java.util.logging.Logger; @@ -1231,40 +1232,33 @@ public String getTypeFromFilterQuery(String filterQuery) { } public List getFriendlyNamesFromFilterQuery(String filterQuery) { - - - if ((filterQuery == null)|| - (datasetfieldFriendlyNamesBySolrField == null)|| - (staticSolrFieldFriendlyNamesBySolrField==null)){ + + if ((filterQuery == null) || + (datasetfieldFriendlyNamesBySolrField == null) || + (staticSolrFieldFriendlyNamesBySolrField == null)) { return null; } - - if(!filterQuery.contains(":")) { + + if (!filterQuery.contains(":")) { return null; } - + int index = filterQuery.indexOf(":"); String key = filterQuery.substring(0,index); String value = filterQuery.substring(index+1); - List friendlyNames = new ArrayList<>(); + // friendlyNames get 2 entries : key and value + List friendlyNames = new ArrayList<>(2); + // Get dataset field friendly name from default ressource bundle file String datasetfieldFriendyName = datasetfieldFriendlyNamesBySolrField.get(key); if (datasetfieldFriendyName != null) { friendlyNames.add(datasetfieldFriendyName); } else { + // Get non dataset field friendly name from "staticSearchFields" ressource bundle file String nonDatasetSolrField = staticSolrFieldFriendlyNamesBySolrField.get(key); if (nonDatasetSolrField != null) { friendlyNames.add(nonDatasetSolrField); - } else if (key.equals(SearchFields.PUBLICATION_STATUS)) { - /** - * @todo Refactor this quick fix for - * https://github.com/IQSS/dataverse/issues/618 . We really need - * to get rid of all the reflection that's happening with - * solrQueryResponse.getStaticSolrFieldFriendlyNamesBySolrField() - * and - */ - friendlyNames.add("Publication Status"); } else { // meh. better than nuthin' friendlyNames.add(key); @@ -1276,9 +1270,13 @@ public List getFriendlyNamesFromFilterQuery(String filterQuery) { String valueWithoutQuotes = noTrailingQuote; if (key.equals(SearchFields.METADATA_TYPES) && getDataverse() != null && getDataverse().getMetadataBlockFacets() != null) { - Optional friendlyName = getDataverse().getMetadataBlockFacets().stream().filter(block -> block.getMetadataBlock().getName().equals(valueWithoutQuotes)).findFirst().map(block -> block.getMetadataBlock().getLocaleDisplayFacet()); + Optional friendlyName = getDataverse().getMetadataBlockFacets() + .stream() + .filter(block -> block.getMetadataBlock().getName().equals(valueWithoutQuotes)) + .findFirst() + .map(block -> block.getMetadataBlock().getLocaleDisplayFacet()); logger.fine(String.format("action=getFriendlyNamesFromFilterQuery key=%s value=%s friendlyName=%s", key, value, friendlyName)); - if(friendlyName.isPresent()) { + if (friendlyName.isPresent()) { friendlyNames.add(friendlyName.get()); return friendlyNames; } @@ -1290,7 +1288,15 @@ public List getFriendlyNamesFromFilterQuery(String filterQuery) { } } - friendlyNames.add(valueWithoutQuotes); + // Get value friendly name from default ressource bundle file + String valueFriendlyName; + try { + valueFriendlyName = BundleUtil.getStringFromPropertyFile(noTrailingQuote, "Bundle"); + } catch (MissingResourceException e) { + valueFriendlyName = noTrailingQuote; + } + + friendlyNames.add(valueFriendlyName); return friendlyNames; } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index 8b1959ef7d4..3aab6a8da3c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -35,6 +35,8 @@ import jakarta.inject.Inject; import jakarta.inject.Named; import jakarta.persistence.NoResultException; + +import org.apache.commons.lang3.StringUtils; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery.SortClause; import org.apache.solr.client.solrj.SolrServerException; @@ -52,6 +54,8 @@ public class SearchServiceBean { private static final Logger logger = Logger.getLogger(SearchServiceBean.class.getCanonicalName()); + private static final String ALL_GROUPS = "*"; + /** * We're trying to make the SearchServiceBean lean, mean, and fast, with as * few injections of EJBs as possible. @@ -182,6 +186,7 @@ public SolrQueryResponse search( SolrQuery solrQuery = new SolrQuery(); query = SearchUtil.sanitizeQuery(query); + solrQuery.setQuery(query); if (sortField != null) { // is it ok not to specify any sort? - there are cases where we @@ -323,24 +328,13 @@ public SolrQueryResponse search( } } - //I'm not sure if just adding null here is good for hte permissions system... i think it needs something - if(dataverses != null) { - for(Dataverse dataverse : dataverses) { - // ----------------------------------- - // PERMISSION FILTER QUERY - // ----------------------------------- - String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, dataverse, onlyDatatRelatedToMe, addFacets); - if (permissionFilterQuery != null) { - solrQuery.addFilterQuery(permissionFilterQuery); - } - } - } else { - String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, null, onlyDatatRelatedToMe, addFacets); - if (permissionFilterQuery != null) { - solrQuery.addFilterQuery(permissionFilterQuery); - } + // ----------------------------------- + // PERMISSION FILTER QUERY + // ----------------------------------- + String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, onlyDatatRelatedToMe, addFacets); + if (!StringUtils.isBlank(permissionFilterQuery)) { + solrQuery.addFilterQuery(permissionFilterQuery); } - /** * @todo: do sanity checking... throw error if negative @@ -994,7 +988,7 @@ public String getCapitalizedName(String name) { * * @return */ - private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQuery solrQuery, Dataverse dataverse, boolean onlyDatatRelatedToMe, boolean addFacets) { + private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQuery solrQuery, boolean onlyDatatRelatedToMe, boolean addFacets) { User user = dataverseRequest.getUser(); if (user == null) { @@ -1003,38 +997,22 @@ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQ if (solrQuery == null) { throw new NullPointerException("solrQuery cannot be null"); } - /** - * @todo For people who are not logged in, should we show stuff indexed - * with "AllUsers" group or not? If so, uncomment the allUsersString - * stuff below. - */ -// String allUsersString = IndexServiceBean.getGroupPrefix() + AllUsers.get().getAlias(); -// String publicOnly = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + " OR " + allUsersString + ")"; - String publicOnly = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + ")"; -// String publicOnly = "{!join from=" + SearchFields.GROUPS + " to=" + SearchFields.PERMS + "}id:" + IndexServiceBean.getPublicGroupString(); - // initialize to public only to be safe - String dangerZoneNoSolrJoin = null; - + if (user instanceof PrivateUrlUser) { user = GuestUser.get(); } - AuthenticatedUser au = null; + ArrayList groupList = new ArrayList(); + AuthenticatedUser au = null; Set groups; - - if (user instanceof GuestUser) { - // Yes, GuestUser may be part of one or more groups; such as IP Groups. - groups = groupService.collectAncestors(groupService.groupsFor(dataverseRequest)); - } else { - if (!(user instanceof AuthenticatedUser)) { - logger.severe("Should never reach here. A User must be an AuthenticatedUser or a Guest"); - throw new IllegalStateException("A User must be an AuthenticatedUser or a Guest"); - } + boolean avoidJoin = FeatureFlags.AVOID_EXPENSIVE_SOLR_JOIN.enabled(); + + if (user instanceof AuthenticatedUser) { au = (AuthenticatedUser) user; - + // ---------------------------------------------------- - // (3) Is this a Super User? + // Is this a Super User? // If so, they can see everything // ---------------------------------------------------- if (au.isSuperuser()) { @@ -1042,187 +1020,76 @@ private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQ // to see everything in Solr with no regard to permissions. But it's // been this way since Dataverse 4.0. So relax. :) - return dangerZoneNoSolrJoin; + return buildPermissionFilterQuery(avoidJoin, ALL_GROUPS); } - + // ---------------------------------------------------- - // (4) User is logged in AND onlyDatatRelatedToMe == true + // User is logged in AND onlyDatatRelatedToMe == true // Yes, give back everything -> the settings will be in - // the filterqueries given to search + // the filterqueries given to search // ---------------------------------------------------- if (onlyDatatRelatedToMe == true) { if (systemConfig.myDataDoesNotUsePermissionDocs()) { logger.fine("old 4.2 behavior: MyData is not using Solr permission docs"); - return dangerZoneNoSolrJoin; + return buildPermissionFilterQuery(avoidJoin, ALL_GROUPS); } else { // fall-through logger.fine("new post-4.2 behavior: MyData is using Solr permission docs"); } } - // ---------------------------------------------------- - // (5) Work with Authenticated User who is not a Superuser + // Work with Authenticated User who is not a Superuser // ---------------------------------------------------- - - groups = groupService.collectAncestors(groupService.groupsFor(dataverseRequest)); + groupList.add(IndexServiceBean.getGroupPerUserPrefix() + au.getId()); } - if (FeatureFlags.AVOID_EXPENSIVE_SOLR_JOIN.enabled()) { - /** - * Instead of doing a super expensive join, we will rely on the - * new boolean field PublicObject:true for public objects. This field - * is indexed on the content document itself, rather than a permission - * document. An additional join will be added only for any extra, - * more restricted groups that the user may be part of. - * **Note the experimental nature of this optimization**. - */ - StringBuilder sb = new StringBuilder(); - StringBuilder sbgroups = new StringBuilder(); - - // All users, guests and authenticated, should see all the - // documents marked as publicObject_b:true, at least: - sb.append(SearchFields.PUBLIC_OBJECT + ":" + true); + // In addition to the user referenced directly, we will also + // add joins on all the non-public groups that may exist for the + // user: - // One or more groups *may* also be available for this user. Once again, - // do note that Guest users may be part of some groups, such as - // IP groups. - - int groupCounter = 0; + // Authenticated users, *and the GuestUser*, may be part of one or more groups; such + // as IP Groups. + groups = groupService.collectAncestors(groupService.groupsFor(dataverseRequest)); - // An AuthenticatedUser should also be able to see all the content - // on which they have direct permissions: - if (au != null) { - groupCounter++; - sbgroups.append(IndexServiceBean.getGroupPerUserPrefix() + au.getId()); - } - - // In addition to the user referenced directly, we will also - // add joins on all the non-public groups that may exist for the - // user: - for (Group group : groups) { - String groupAlias = group.getAlias(); - if (groupAlias != null && !groupAlias.isEmpty() && !groupAlias.startsWith("builtIn")) { - groupCounter++; - if (groupCounter > 1) { - sbgroups.append(" OR "); - } - sbgroups.append(IndexServiceBean.getGroupPrefix() + groupAlias); - } - } - - if (groupCounter > 1) { - // If there is more than one group, the parentheses must be added: - sbgroups.insert(0, "("); - sbgroups.append(")"); - } - - if (groupCounter > 0) { - // If there are any groups for this user, an extra join must be - // added to the query, and the extra sub-query must be added to - // the combined Solr query: - sb.append(" OR {!join from=" + SearchFields.DEFINITION_POINT + " to=id v=$q1}"); - // Add the subquery to the combined Solr query: - solrQuery.setParam("q1", SearchFields.DISCOVERABLE_BY + ":" + sbgroups.toString()); - logger.info("The sub-query q1 set to " + SearchFields.DISCOVERABLE_BY + ":" + sbgroups.toString()); - } - - String ret = sb.toString(); - logger.fine("Returning experimental query: " + ret); - return ret; - } - - // END OF EXPERIMENTAL OPTIMIZATION - - // Old, un-optimized way of handling permissions. - // Largely left intact, minus the lookups that have already been performed - // above. - - // ---------------------------------------------------- - // (1) Is this a GuestUser? - // ---------------------------------------------------- - if (user instanceof GuestUser) { - - StringBuilder sb = new StringBuilder(); - - String groupsFromProviders = ""; - for (Group group : groups) { - logger.fine("found group " + group.getIdentifier() + " with alias " + group.getAlias()); - String groupAlias = group.getAlias(); - if (groupAlias != null && !groupAlias.isEmpty()) { - sb.append(" OR "); - // i.e. group_builtIn/all-users, ip/ipGroup3 - sb.append(IndexServiceBean.getGroupPrefix()).append(groupAlias); - } - } - groupsFromProviders = sb.toString(); - logger.fine("groupsFromProviders:" + groupsFromProviders); - String guestWithGroups = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + groupsFromProviders + ")"; - logger.fine(guestWithGroups); - return guestWithGroups; - } - - // ---------------------------------------------------- - // (5) Work with Authenticated User who is not a Superuser - // ---------------------------------------------------- - // It was already confirmed, that if the user is not GuestUser, we - // have an AuthenticatedUser au which is not null. - /** - * @todo all this code needs cleanup and clarification. - */ - /** - * Every AuthenticatedUser is part of a "User Private Group" (UGP), a - * concept we borrow from RHEL: - * https://access.redhat.com/site/documentation/en-US/Red_Hat_Enterprise_Linux/6/html/Deployment_Guide/ch-Managing_Users_and_Groups.html#s2-users-groups-private-groups - */ - /** - * @todo rename this from publicPlusUserPrivateGroup. Confusing - */ - // safe default: public only - String publicPlusUserPrivateGroup = publicOnly; -// + (onlyDatatRelatedToMe ? "" : (publicOnly + " OR ")) -// + "{!join from=" + SearchFields.GROUPS + " to=" + SearchFields.PERMS + "}id:" + IndexServiceBean.getGroupPerUserPrefix() + au.getId() + ")"; - -// /** -// * @todo add onlyDatatRelatedToMe option into the experimental JOIN -// * before enabling it. -// */ - /** - * From a search perspective, we don't care about if the group was - * created within one dataverse or another. We just want a list of *all* - * the groups the user is part of. We are greedy. We want all BuiltIn - * Groups, Shibboleth Groups, IP Groups, "system" groups, everything. - * - * A JOIN on "permission documents" will determine if the user can find - * a given "content document" (dataset version, etc) in Solr. - */ - String groupsFromProviders = ""; - StringBuilder sb = new StringBuilder(); for (Group group : groups) { - logger.fine("found group " + group.getIdentifier() + " with alias " + group.getAlias()); String groupAlias = group.getAlias(); - if (groupAlias != null && !groupAlias.isEmpty()) { - sb.append(" OR "); - // i.e. group_builtIn/all-users, group_builtIn/authenticated-users, group_1-explictGroup1, group_shib/2 - sb.append(IndexServiceBean.getGroupPrefix() + groupAlias); + if (groupAlias != null && !groupAlias.isEmpty() && (!avoidJoin || !groupAlias.startsWith("builtIn"))) { + groupList.add(IndexServiceBean.getGroupPrefix() + groupAlias); } } - groupsFromProviders = sb.toString(); - logger.fine(groupsFromProviders); - if (true) { - /** - * @todo get rid of "experimental" in name - */ - String experimentalJoin = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + " OR " + IndexServiceBean.getGroupPerUserPrefix() + au.getId() + groupsFromProviders + ")"; - publicPlusUserPrivateGroup = experimentalJoin; + if (!avoidJoin) { + // Add the public group + groupList.add(0, IndexServiceBean.getPublicGroupString()); + } + + String groupString = null; + //If we have additional groups, format them correctly into a search string, with parens if there is more than one + if (groupList.size() > 1) { + groupString = "(" + StringUtils.join(groupList, " OR ") + ")"; + } else if (groupList.size() == 1) { + groupString = groupList.get(0); } - - //permissionFilterQuery = publicPlusUserPrivateGroup; - logger.fine(publicPlusUserPrivateGroup); - - return publicPlusUserPrivateGroup; - + logger.fine("Groups: " + groupString); + String permissionQuery = buildPermissionFilterQuery(avoidJoin, groupString); + logger.fine("Permission Query: " + permissionQuery); + return permissionQuery; } + private String buildPermissionFilterQuery(boolean avoidJoin, String permissionFilterGroups) { + String query = (avoidJoin&& !isAllGroups(permissionFilterGroups)) ? SearchFields.PUBLIC_OBJECT + ":" + true : ""; + if (permissionFilterGroups != null && !isAllGroups(permissionFilterGroups)) { + if (!query.isEmpty()) { + query = "(" + query + " OR " + "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":" + permissionFilterGroups + ")"; + } else { + query = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":" + permissionFilterGroups; + } + } + return query; + } + + private boolean isAllGroups(String groups) { + return (groups!=null &&groups.equals(ALL_GROUPS)); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java index cfe29ea08c7..e4d885276d0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrIndexServiceBean.java @@ -34,7 +34,7 @@ public class SolrIndexServiceBean { private static final Logger logger = Logger.getLogger(SolrIndexServiceBean.class.getCanonicalName()); - + @EJB DvObjectServiceBean dvObjectService; @EJB @@ -149,7 +149,7 @@ private List constructDatasetSolrDocs(Dataset dataset) { return solrDocs; } -// private List constructDatafileSolrDocs(DataFile dataFile) { + // private List constructDatafileSolrDocs(DataFile dataFile) { private List constructDatafileSolrDocs(DataFile dataFile, Map> permStringByDatasetVersion) { List datafileSolrDocs = new ArrayList<>(); Map desiredCards = searchPermissionsService.getDesiredCards(dataFile.getOwner()); @@ -166,14 +166,14 @@ private List constructDatafileSolrDocs(DataFile dataFile, Map constructDatafileSolrDocsFromDataset(Dataset datas } else { perms = searchPermissionsService.findDatasetVersionPerms(datasetVersionFileIsAttachedTo); } + for (FileMetadata fileMetadata : datasetVersionFileIsAttachedTo.getFileMetadatas()) { Long fileId = fileMetadata.getDataFile().getId(); String solrIdStart = IndexServiceBean.solrDocIdentifierFile + fileId; String solrIdEnd = getDatasetOrDataFileSolrEnding(datasetVersionFileIsAttachedTo.getVersionState()); String solrId = solrIdStart + solrIdEnd; DvObjectSolrDoc dataFileSolrDoc = new DvObjectSolrDoc(fileId.toString(), solrId, datasetVersionFileIsAttachedTo.getId(), fileMetadata.getLabel(), perms); - logger.fine("adding fileid " + fileId); + logger.finest("adding fileid " + fileId); datafileSolrDocs.add(dataFileSolrDoc); } } @@ -361,20 +362,19 @@ private void persistToSolr(Collection docs) throws SolrServer public IndexResponse indexPermissionsOnSelfAndChildren(long definitionPointId) { DvObject definitionPoint = dvObjectService.findDvObject(definitionPointId); - if ( definitionPoint == null ) { + if (definitionPoint == null) { logger.log(Level.WARNING, "Cannot find a DvOpbject with id of {0}", definitionPointId); return null; } else { return indexPermissionsOnSelfAndChildren(definitionPoint); } } - + /** * We use the database to determine direct children since there is no * inheritance */ public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint) { - List dvObjectsToReindexPermissionsFor = new ArrayList<>(); List filesToReindexAsBatch = new ArrayList<>(); /** * @todo Re-indexing the definition point itself seems to be necessary @@ -383,27 +383,47 @@ public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint) // We don't create a Solr "primary/content" doc for the root dataverse // so don't create a Solr "permission" doc either. + int i = 0; + int numObjects = 0; if (definitionPoint.isInstanceofDataverse()) { Dataverse selfDataverse = (Dataverse) definitionPoint; if (!selfDataverse.equals(dataverseService.findRootDataverse())) { - dvObjectsToReindexPermissionsFor.add(definitionPoint); + indexPermissionsForOneDvObject(definitionPoint); + numObjects++; } List directChildDatasetsOfDvDefPoint = datasetService.findByOwnerId(selfDataverse.getId()); for (Dataset dataset : directChildDatasetsOfDvDefPoint) { - dvObjectsToReindexPermissionsFor.add(dataset); + indexPermissionsForOneDvObject(dataset); + numObjects++; for (DataFile datafile : filesToReIndexPermissionsFor(dataset)) { filesToReindexAsBatch.add(datafile); + i++; + if (i % 100 == 0) { + reindexFilesInBatches(filesToReindexAsBatch); + filesToReindexAsBatch.clear(); + } + if (i % 1000 == 0) { + logger.fine("Progress: " +i + " files permissions reindexed"); + } } + logger.fine("Progress : dataset " + dataset.getId() + " permissions reindexed"); } } else if (definitionPoint.isInstanceofDataset()) { - dvObjectsToReindexPermissionsFor.add(definitionPoint); + indexPermissionsForOneDvObject(definitionPoint); + numObjects++; // index files Dataset dataset = (Dataset) definitionPoint; for (DataFile datafile : filesToReIndexPermissionsFor(dataset)) { filesToReindexAsBatch.add(datafile); + i++; + if (i % 100 == 0) { + reindexFilesInBatches(filesToReindexAsBatch); + filesToReindexAsBatch.clear(); + } } } else { - dvObjectsToReindexPermissionsFor.add(definitionPoint); + indexPermissionsForOneDvObject(definitionPoint); + numObjects++; } /** @@ -412,64 +432,64 @@ public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint) * @todo Should update timestamps, probably, even thought these are * files, see https://github.com/IQSS/dataverse/issues/2421 */ - String response = reindexFilesInBatches(filesToReindexAsBatch); - - for (DvObject dvObject : dvObjectsToReindexPermissionsFor) { - /** - * @todo do something with this response - */ - IndexResponse indexResponse = indexPermissionsForOneDvObject(dvObject); - } - + reindexFilesInBatches(filesToReindexAsBatch); + logger.fine("Reindexed permissions for " + i + " files and " + numObjects + " datasets/collections"); return new IndexResponse("Number of dvObject permissions indexed for " + definitionPoint - + ": " + dvObjectsToReindexPermissionsFor.size() - ); + + ": " + numObjects); } private String reindexFilesInBatches(List filesToReindexPermissionsFor) { List docs = new ArrayList<>(); Map> byParentId = new HashMap<>(); Map> permStringByDatasetVersion = new HashMap<>(); - for (DataFile file : filesToReindexPermissionsFor) { - Dataset dataset = (Dataset) file.getOwner(); - Map desiredCards = searchPermissionsService.getDesiredCards(dataset); - for (DatasetVersion datasetVersionFileIsAttachedTo : datasetVersionsToBuildCardsFor(dataset)) { - boolean cardShouldExist = desiredCards.get(datasetVersionFileIsAttachedTo.getVersionState()); - if (cardShouldExist) { - List cachedPermission = permStringByDatasetVersion.get(datasetVersionFileIsAttachedTo.getId()); - if (cachedPermission == null) { - logger.fine("no cached permission! Looking it up..."); - List fileSolrDocs = constructDatafileSolrDocs((DataFile) file, permStringByDatasetVersion); - for (DvObjectSolrDoc fileSolrDoc : fileSolrDocs) { - Long datasetVersionId = fileSolrDoc.getDatasetVersionId(); - if (datasetVersionId != null) { - permStringByDatasetVersion.put(datasetVersionId, fileSolrDoc.getPermissions()); + int i = 0; + try { + for (DataFile file : filesToReindexPermissionsFor) { + Dataset dataset = (Dataset) file.getOwner(); + Map desiredCards = searchPermissionsService.getDesiredCards(dataset); + for (DatasetVersion datasetVersionFileIsAttachedTo : datasetVersionsToBuildCardsFor(dataset)) { + boolean cardShouldExist = desiredCards.get(datasetVersionFileIsAttachedTo.getVersionState()); + if (cardShouldExist) { + List cachedPermission = permStringByDatasetVersion.get(datasetVersionFileIsAttachedTo.getId()); + if (cachedPermission == null) { + logger.finest("no cached permission! Looking it up..."); + List fileSolrDocs = constructDatafileSolrDocs((DataFile) file, permStringByDatasetVersion); + for (DvObjectSolrDoc fileSolrDoc : fileSolrDocs) { + Long datasetVersionId = fileSolrDoc.getDatasetVersionId(); + if (datasetVersionId != null) { + permStringByDatasetVersion.put(datasetVersionId, fileSolrDoc.getPermissions()); + SolrInputDocument solrDoc = SearchUtil.createSolrDoc(fileSolrDoc); + docs.add(solrDoc); + i++; + } + } + } else { + logger.finest("cached permission is " + cachedPermission); + List fileSolrDocsBasedOnCachedPermissions = constructDatafileSolrDocs((DataFile) file, permStringByDatasetVersion); + for (DvObjectSolrDoc fileSolrDoc : fileSolrDocsBasedOnCachedPermissions) { SolrInputDocument solrDoc = SearchUtil.createSolrDoc(fileSolrDoc); docs.add(solrDoc); + i++; } } - } else { - logger.fine("cached permission is " + cachedPermission); - List fileSolrDocsBasedOnCachedPermissions = constructDatafileSolrDocs((DataFile) file, permStringByDatasetVersion); - for (DvObjectSolrDoc fileSolrDoc : fileSolrDocsBasedOnCachedPermissions) { - SolrInputDocument solrDoc = SearchUtil.createSolrDoc(fileSolrDoc); - docs.add(solrDoc); + if (i % 20 == 0) { + persistToSolr(docs); + docs = new ArrayList<>(); } } } + Long parent = file.getOwner().getId(); + List existingList = byParentId.get(parent); + if (existingList == null) { + List empty = new ArrayList<>(); + byParentId.put(parent, empty); + } else { + List updatedList = existingList; + updatedList.add(file.getId()); + byParentId.put(parent, updatedList); + } } - Long parent = file.getOwner().getId(); - List existingList = byParentId.get(parent); - if (existingList == null) { - List empty = new ArrayList<>(); - byParentId.put(parent, empty); - } else { - List updatedList = existingList; - updatedList.add(file.getId()); - byParentId.put(parent, updatedList); - } - } - try { + persistToSolr(docs); return " " + filesToReindexPermissionsFor.size() + " files indexed across " + docs.size() + " Solr documents "; } catch (SolrServerException | IOException ex) { @@ -517,29 +537,26 @@ public JsonObjectBuilder deleteAllFromSolrAndResetIndexTimes() throws SolrServer } /** - * - * * @return A list of dvobject ids that should have their permissions - * re-indexed because Solr was down when a permission was added. The permission - * should be added to Solr. The id of the permission contains the type of - * DvObject and the primary key of the dvObject. - * DvObjects of type DataFile are currently skipped because their index - * time isn't stored in the database, since they are indexed along - * with their parent dataset (this may change). + * re-indexed because Solr was down when a permission was added. The + * permission should be added to Solr. The id of the permission contains the + * type of DvObject and the primary key of the dvObject. DvObjects of type + * DataFile are currently skipped because their index time isn't stored in + * the database, since they are indexed along with their parent dataset + * (this may change). */ public List findPermissionsInDatabaseButStaleInOrMissingFromSolr() { List indexingRequired = new ArrayList<>(); long rootDvId = dataverseService.findRootDataverse().getId(); List missingDataversePermissionIds = dataverseService.findIdStalePermission(); List missingDatasetPermissionIds = datasetService.findIdStalePermission(); - for (Long id : missingDataversePermissionIds) { + for (Long id : missingDataversePermissionIds) { if (!id.equals(rootDvId)) { - indexingRequired.add(id); + indexingRequired.add(id); } } indexingRequired.addAll(missingDatasetPermissionIds); return indexingRequired; } - } diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java index 33e828e619d..20632c170e4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/FeatureFlags.java @@ -97,12 +97,16 @@ public enum FeatureFlags { * for the dataset. * * @apiNote Raise flag by setting - * "dataverse.feature.enable-dataset-thumbnail-autoselect" + * "dataverse.feature.disable-dataset-thumbnail-autoselect" * @since Dataverse 6.4 */ DISABLE_DATASET_THUMBNAIL_AUTOSELECT("disable-dataset-thumbnail-autoselect"), /** * Feature flag for the new Globus upload framework. + * + * @apiNote Raise flag by setting + * "dataverse.feature.globus-use-experimental-async-framework" + * @since Dataverse 6.4 */ GLOBUS_USE_EXPERIMENTAL_ASYNC_FRAMEWORK("globus-use-experimental-async-framework"), ; diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index a0c32d5c8ce..991682ec8e8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -525,15 +525,18 @@ public static String determineFileType(File f, String fileName) throws IOExcepti // Check for shapefile extensions as described here: http://en.wikipedia.org/wiki/Shapefile //logger.info("Checking for shapefile"); - ShapefileHandler shp_handler = new ShapefileHandler(new FileInputStream(f)); + ShapefileHandler shp_handler = new ShapefileHandler(f); if (shp_handler.containsShapefile()){ // logger.info("------- shapefile FOUND ----------"); fileType = ShapefileHandler.SHAPEFILE_FILE_TYPE; //"application/zipped-shapefile"; } - - Optional bagItFileHandler = CDI.current().select(BagItFileHandlerFactory.class).get().getBagItFileHandler(); - if(bagItFileHandler.isPresent() && bagItFileHandler.get().isBagItPackage(fileName, f)) { - fileType = BagItFileHandler.FILE_TYPE; + try { + Optional bagItFileHandler = CDI.current().select(BagItFileHandlerFactory.class).get().getBagItFileHandler(); + if (bagItFileHandler.isPresent() && bagItFileHandler.get().isBagItPackage(fileName, f)) { + fileType = BagItFileHandler.FILE_TYPE; + } + } catch (Exception e) { + logger.warning("Error checking for BagIt package: " + e.getMessage()); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java index f68957ad060..80e32184731 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtil.java @@ -123,7 +123,7 @@ public static JsonObject getPersonOrOrganization(String name, boolean organizati if (!name.replaceFirst(",", "").contains(",")) { // contributorName=, String[] fullName = name.split(", "); - givenName = fullName[1]; + givenName = fullName.length > 1 ? fullName[1] : null; familyName = fullName[0]; } } diff --git a/src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java b/src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java index f1440cc3c02..2b54f7a3bfe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/ShapefileHandler.java @@ -1,23 +1,21 @@ package edu.harvard.iq.dataverse.util; import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; import java.io.FileNotFoundException; import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; import java.util.Date; import java.util.ArrayList; import java.util.List; -import java.util.zip.ZipEntry; -import java.util.zip.ZipInputStream; -import java.util.zip.ZipException; +import java.util.zip.ZipFile; import java.util.HashMap; import java.util.*; import java.nio.file.Files; import java.nio.file.Paths; import static java.nio.file.StandardCopyOption.REPLACE_EXISTING; -import java.util.logging.Level; + import java.util.logging.Logger; import org.apache.commons.io.FileUtils; @@ -43,11 +41,10 @@ * "shape1.pdf", "README.md", "shape_notes.txt" * * Code Example: - * FileInputStream shp_file_input_stream = new FileInputStream(new File("zipped_shapefile.zip")) - * ShapefileHandler shp_handler = new ShapefileHandler(shp_file_input_stream); + * ShapefileHandler shp_handler = new ShapefileHandler(new File("zipped_shapefile.zip")); * if (shp_handler.containsShapefile()){ * File rezip_folder = new File("~/folder_for_rezipping"); - * boolean rezip_success = shp_handler.rezipShapefileSets(shp_file_input_stream, rezip_folder ); + * boolean rezip_success = shp_handler.rezipShapefileSets(rezip_folder ); * if (!rezip_success){ * // rezip failed, should be an error message (String) available System.out.println(shp_handler.error_message); @@ -68,13 +65,13 @@ public class ShapefileHandler{ private static final Logger logger = Logger.getLogger(ShapefileHandler.class.getCanonicalName()); // Reference for these extensions: http://en.wikipedia.org/wiki/Shapefile - public final static String SHAPEFILE_FILE_TYPE = "application/zipped-shapefile"; - public final static String SHAPEFILE_FILE_TYPE_FRIENDLY_NAME = "Shapefile as ZIP Archive"; - public final static List SHAPEFILE_MANDATORY_EXTENSIONS = Arrays.asList("shp", "shx", "dbf", "prj"); - public final static String SHP_XML_EXTENSION = "shp.xml"; - public final static String BLANK_EXTENSION = "__PLACEHOLDER-FOR-BLANK-EXTENSION__"; - public final static List SHAPEFILE_ALL_EXTENSIONS = Arrays.asList("shp", "shx", "dbf", "prj", "sbn", "sbx", "fbn", "fbx", "ain", "aih", "ixs", "mxs", "atx", "cpg", "qpj", "qmd", SHP_XML_EXTENSION); - + public static final String SHAPEFILE_FILE_TYPE = "application/zipped-shapefile"; + public static final String SHAPEFILE_FILE_TYPE_FRIENDLY_NAME = "Shapefile as ZIP Archive"; + public static final List SHAPEFILE_MANDATORY_EXTENSIONS = Arrays.asList("shp", "shx", "dbf", "prj"); + public static final String SHP_XML_EXTENSION = "shp.xml"; + public static final String BLANK_EXTENSION = "__PLACEHOLDER-FOR-BLANK-EXTENSION__"; + public static final List SHAPEFILE_ALL_EXTENSIONS = Arrays.asList("shp", "shx", "dbf", "prj", "sbn", "sbx", "fbn", "fbx", "ain", "aih", "ixs", "mxs", "atx", "cpg", "qpj", "qmd", SHP_XML_EXTENSION); + private final File zipFile; public boolean DEBUG = false; private boolean zipFileProcessed = false; @@ -97,9 +94,6 @@ public class ShapefileHandler{ private Map> fileGroups = new HashMap<>(); private List finalRezippedFiles = new ArrayList<>(); - - private String outputFolder = "unzipped"; - private String rezippedFolder = "rezipped"; // Debug helper private void msg(String s){ @@ -116,40 +110,28 @@ private void msgt(String s){ } /* - Constructor, start with filename - */ - public ShapefileHandler(String filename){ - - if (filename==null){ - this.addErrorMessage("The filename was null"); - return; - } - - FileInputStream zip_file_stream; - try { - zip_file_stream = new FileInputStream(new File(filename)); - } catch (FileNotFoundException ex) { - this.addErrorMessage("The file was not found"); + Constructor, start with File + */ + public ShapefileHandler(File zip_file) throws IOException { + zipFile = zip_file; + if (zip_file == null) { + this.addErrorMessage("The file was null"); return; } - - this.examineZipfile(zip_file_stream); - } - - - /* - Constructor, start with FileInputStream - */ - public ShapefileHandler(FileInputStream zip_file_stream){ - - if (zip_file_stream==null){ - this.addErrorMessage("The zip_file_stream was null"); - return; + try (var zip_file_object = new ZipFile(zip_file)) { + this.examineZipfile(zip_file_object); + } + catch (FileNotFoundException ex) { + // While this constructor had a FileInputStream as argument: + // FileUtil.determineFileType threw this exception before calling the constructor with a FileInputStream + // IngestServiceShapefileHelper.processFile won´t call this constructor if the file is not valid hence does not exist. + // When the file would have disappeared in the meantime, it would have produced a slightly different error message. + logger.severe("File not found: " + zip_file.getAbsolutePath()); + throw ex; } - this.examineZipfile(zip_file_stream); } - + public List getFinalRezippedFiles(){ return this.finalRezippedFiles; } @@ -291,26 +273,19 @@ inside the uploaded zip file (issue #6873). To achieve this, we recreate subfolders in the FileMetadata of the newly created DataFiles. (-- L.A. 09/2020) */ - private boolean unzipFilesToDirectory(FileInputStream zipfile_input_stream, File target_directory){ + private boolean unzipFilesToDirectory(ZipFile zipfileInput, File target_directory){ logger.fine("unzipFilesToDirectory: " + target_directory.getAbsolutePath() ); - if (zipfile_input_stream== null){ - this.addErrorMessage("unzipFilesToDirectory. The zipfile_input_stream is null."); - return false; - } if (!target_directory.isDirectory()){ this.addErrorMessage("This directory does not exist: " + target_directory.getAbsolutePath()); return false; } - List unzippedFileNames = new ArrayList<>(); - - ZipInputStream zipStream = new ZipInputStream(zipfile_input_stream); + List unzippedFileNames = new ArrayList<>(); + - ZipEntry origEntry; - byte[] buffer = new byte[2048]; try { - while((origEntry = zipStream.getNextEntry())!=null){ + for(var origEntry : Collections.list(zipfileInput.entries())){ String zentryFileName = origEntry.getName(); logger.fine("\nOriginal entry name: " + origEntry); @@ -360,15 +335,10 @@ private boolean unzipFilesToDirectory(FileInputStream zipfile_input_stream, File unzippedFileNames.add(outpath); } logger.fine("Write zip file: " + outpath); - FileOutputStream fileOutputStream; - long fsize = 0; - fileOutputStream = new FileOutputStream(outpath); - int len;// = 0; - while ((len = zipStream.read(buffer)) > 0){ - fileOutputStream.write(buffer, 0, len); - fsize+=len; - } // end while - fileOutputStream.close(); + try(var inputStream = zipfileInput.getInputStream(origEntry)) { + Files.createDirectories(new File(outpath).getParentFile().toPath()); + Files.copy(inputStream, Path.of(outpath), StandardCopyOption.REPLACE_EXISTING); + } } // end outer while } catch (IOException ex) { for (StackTraceElement el : ex.getStackTrace()){ @@ -377,19 +347,13 @@ private boolean unzipFilesToDirectory(FileInputStream zipfile_input_stream, File this.addErrorMessage("Failed to open ZipInputStream entry" + ex.getMessage()); return false; } - - try { - zipStream.close(); - } catch (IOException ex) { - Logger.getLogger(ShapefileHandler.class.getName()).log(Level.SEVERE, null, ex); - } - return true; + return true; } /* Rezip the shapefile(s) into a given directory Assumes that the zipfile_input_stream has already been checked! */ - public boolean rezipShapefileSets(FileInputStream zipfile_input_stream, File rezippedFolder) throws IOException{ + public boolean rezipShapefileSets(File rezippedFolder) throws IOException{ logger.fine("rezipShapefileSets"); //msgt("rezipShapefileSets"); if (!this.zipFileProcessed){ @@ -400,10 +364,6 @@ public boolean rezipShapefileSets(FileInputStream zipfile_input_stream, File rez this.addErrorMessage("There are no shapefiles here!"); return false; } - if (zipfile_input_stream== null){ - this.addErrorMessage("The zipfile_input_stream is null."); - return false; - } if (rezippedFolder == null){ this.addErrorMessage("The rezippedFolder is null."); return false; @@ -433,9 +393,11 @@ public boolean rezipShapefileSets(FileInputStream zipfile_input_stream, File rez // Unzip files! - if (!this.unzipFilesToDirectory(zipfile_input_stream, dir_for_unzipping)){ - this.addErrorMessage("Failed to unzip files."); - return false; + try(var zipfileObject = new ZipFile(zipFile)) { + if (!this.unzipFilesToDirectory(zipfileObject, dir_for_unzipping)) { + this.addErrorMessage("Failed to unzip files."); + return false; + } } // Redistribute files! String target_dirname = rezippedFolder.getAbsolutePath(); @@ -681,27 +643,19 @@ private boolean isFileToSkip(String fname){ /************************************** * Iterate through the zip file contents. * Does it contain any shapefiles? - * - * @param FileInputStream zip_file_stream */ - private boolean examineZipfile(FileInputStream zip_file_stream){ + private boolean examineZipfile(ZipFile zip_file){ // msgt("examineZipfile"); - - if (zip_file_stream==null){ - this.addErrorMessage("The zip file stream was null"); - return false; - } - + // Clear out file lists this.filesListInDir.clear(); this.filesizeHash.clear(); this.fileGroups.clear(); - try{ - ZipInputStream zipStream = new ZipInputStream(zip_file_stream); - ZipEntry entry; - List hiddenDirectories = new ArrayList<>(); - while((entry = zipStream.getNextEntry())!=null){ + try{ + List hiddenDirectories = new ArrayList<>(); + for(var entry : Collections.list(zip_file.entries())){ + String zentryFileName = entry.getName(); boolean isDirectory = entry.isDirectory(); @@ -748,8 +702,6 @@ private boolean examineZipfile(FileInputStream zip_file_stream){ this.filesizeHash.put(unzipFilePath, entry.getSize()); } } // end while - - zipStream.close(); if (this.filesListInDir.isEmpty()){ errorMessage = "No files in zipStream"; @@ -759,13 +711,8 @@ private boolean examineZipfile(FileInputStream zip_file_stream){ this.zipFileProcessed = true; return true; - }catch(ZipException ex){ - this.addErrorMessage("ZipException"); - msgt("ZipException"); - return false; - }catch(IOException ex){ - //ex.printStackTrace(); + //ex.printStackTrace(); this.addErrorMessage("IOException File name"); msgt("IOException"); return false; @@ -773,9 +720,6 @@ private boolean examineZipfile(FileInputStream zip_file_stream){ this.addErrorMessage("IllegalArgumentException when parsing zipfile"); msgt("IllegalArgumentException when parsing zipfile"); return false; - - }finally{ - } } // end examineFile diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java index 60967b13131..434b3bd8f8f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java @@ -87,8 +87,8 @@ public class SystemConfig { private static final long DEFAULT_THUMBNAIL_SIZE_LIMIT_IMAGE = 3000000L; // 3 MB private static final long DEFAULT_THUMBNAIL_SIZE_LIMIT_PDF = 1000000L; // 1 MB - public final static String DEFAULTCURATIONLABELSET = "DEFAULT"; - public final static String CURATIONLABELSDISABLED = "DISABLED"; + public static final String DEFAULTCURATIONLABELSET = "DEFAULT"; + public static final String CURATIONLABELSDISABLED = "DISABLED"; public String getVersion() { return getVersion(false); @@ -473,7 +473,7 @@ public Integer getSearchHighlightFragmentSize() { String fragSize = settingsService.getValueForKey(SettingsServiceBean.Key.SearchHighlightFragmentSize); if (fragSize != null) { try { - return new Integer(fragSize); + return Integer.valueOf(fragSize); } catch (NumberFormatException nfe) { logger.info("Could not convert " + SettingsServiceBean.Key.SearchHighlightFragmentSize + " to int: " + nfe); } @@ -490,7 +490,7 @@ public long getTabularIngestSizeLimit() { if (limitEntry != null) { try { - Long sizeOption = new Long(limitEntry); + Long sizeOption = Long.valueOf(limitEntry); return sizeOption; } catch (NumberFormatException nfe) { logger.warning("Invalid value for TabularIngestSizeLimit option? - " + limitEntry); @@ -515,7 +515,7 @@ public long getTabularIngestSizeLimit(String formatName) { if (limitEntry != null) { try { - Long sizeOption = new Long(limitEntry); + Long sizeOption = Long.valueOf(limitEntry); return sizeOption; } catch (NumberFormatException nfe) { logger.warning("Invalid value for TabularIngestSizeLimit:" + formatName + "? - " + limitEntry ); @@ -1061,7 +1061,7 @@ public long getDatasetValidationSizeLimit() { if (limitEntry != null) { try { - Long sizeOption = new Long(limitEntry); + Long sizeOption = Long.valueOf(limitEntry); return sizeOption; } catch (NumberFormatException nfe) { logger.warning("Invalid value for DatasetValidationSizeLimit option? - " + limitEntry); @@ -1076,7 +1076,7 @@ public long getFileValidationSizeLimit() { if (limitEntry != null) { try { - Long sizeOption = new Long(limitEntry); + Long sizeOption = Long.valueOf(limitEntry); return sizeOption; } catch (NumberFormatException nfe) { logger.warning("Invalid value for FileValidationSizeLimit option? - " + limitEntry); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java index 2f01c9bc2f2..8552389525d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java @@ -19,6 +19,7 @@ import edu.harvard.iq.dataverse.MetadataBlockServiceBean; import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.api.Util; +import edu.harvard.iq.dataverse.api.dto.DataverseDTO; import edu.harvard.iq.dataverse.api.dto.FieldDTO; import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.IpGroup; import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.ip.IpAddress; @@ -48,8 +49,10 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.function.Consumer; import java.util.logging.Logger; import java.util.stream.Collectors; + import jakarta.json.Json; import jakarta.json.JsonArray; import jakarta.json.JsonObject; @@ -128,7 +131,7 @@ public Dataverse parseDataverse(JsonObject jobj) throws JsonParseException { dv.setPermissionRoot(jobj.getBoolean("permissionRoot", false)); dv.setFacetRoot(jobj.getBoolean("facetRoot", false)); dv.setAffiliation(jobj.getString("affiliation", null)); - + if (jobj.containsKey("dataverseContacts")) { JsonArray dvContacts = jobj.getJsonArray("dataverseContacts"); int i = 0; @@ -141,7 +144,7 @@ public Dataverse parseDataverse(JsonObject jobj) throws JsonParseException { } dv.setDataverseContacts(dvContactList); } - + if (jobj.containsKey("theme")) { DataverseTheme theme = parseDataverseTheme(jobj.getJsonObject("theme")); dv.setDataverseTheme(theme); @@ -149,21 +152,21 @@ public Dataverse parseDataverse(JsonObject jobj) throws JsonParseException { } dv.setDataverseType(Dataverse.DataverseType.UNCATEGORIZED); // default - if (jobj.containsKey("dataverseType")) { - for (Dataverse.DataverseType dvtype : Dataverse.DataverseType.values()) { - if (dvtype.name().equals(jobj.getString("dataverseType"))) { - dv.setDataverseType(dvtype); - } - } + String receivedDataverseType = jobj.getString("dataverseType", null); + if (receivedDataverseType != null) { + Arrays.stream(Dataverse.DataverseType.values()) + .filter(type -> type.name().equals(receivedDataverseType)) + .findFirst() + .ifPresent(dv::setDataverseType); } - + if (jobj.containsKey("filePIDsEnabled")) { dv.setFilePIDsEnabled(jobj.getBoolean("filePIDsEnabled")); } /* We decided that subject is not user set, but gotten from the subject of the dataverse's datasets - leavig this code in for now, in case we need to go back to it at some point - + if (jobj.containsKey("dataverseSubjects")) { List dvSubjectList = new LinkedList<>(); DatasetFieldType subjectType = datasetFieldSvc.findByName(DatasetFieldConstant.subject); @@ -186,10 +189,49 @@ public Dataverse parseDataverse(JsonObject jobj) throws JsonParseException { dv.setDataverseSubjects(dvSubjectList); } */ - + return dv; } - + + public DataverseDTO parseDataverseDTO(JsonObject jsonObject) throws JsonParseException { + DataverseDTO dataverseDTO = new DataverseDTO(); + + setDataverseDTOPropertyIfPresent(jsonObject, "alias", dataverseDTO::setAlias); + setDataverseDTOPropertyIfPresent(jsonObject, "name", dataverseDTO::setName); + setDataverseDTOPropertyIfPresent(jsonObject, "description", dataverseDTO::setDescription); + setDataverseDTOPropertyIfPresent(jsonObject, "affiliation", dataverseDTO::setAffiliation); + + String dataverseType = jsonObject.getString("dataverseType", null); + if (dataverseType != null) { + Arrays.stream(Dataverse.DataverseType.values()) + .filter(type -> type.name().equals(dataverseType)) + .findFirst() + .ifPresent(dataverseDTO::setDataverseType); + } + + if (jsonObject.containsKey("dataverseContacts")) { + JsonArray dvContacts = jsonObject.getJsonArray("dataverseContacts"); + List contacts = new ArrayList<>(); + for (int i = 0; i < dvContacts.size(); i++) { + JsonObject contactObj = dvContacts.getJsonObject(i); + DataverseContact contact = new DataverseContact(); + contact.setContactEmail(getMandatoryString(contactObj, "contactEmail")); + contact.setDisplayOrder(i); + contacts.add(contact); + } + dataverseDTO.setDataverseContacts(contacts); + } + + return dataverseDTO; + } + + private void setDataverseDTOPropertyIfPresent(JsonObject jsonObject, String key, Consumer setter) { + String value = jsonObject.getString(key, null); + if (value != null) { + setter.accept(value); + } + } + public DataverseTheme parseDataverseTheme(JsonObject obj) { DataverseTheme theme = new DataverseTheme(); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index 34c8fc5c6a6..f884d313d64 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -276,7 +276,9 @@ public static JsonObjectBuilder json(Dataverse dv, Boolean hideEmail, Boolean re } bld.add("permissionRoot", dv.isPermissionRoot()) .add("description", dv.getDescription()) - .add("dataverseType", dv.getDataverseType().name()); + .add("dataverseType", dv.getDataverseType().name()) + .add("isMetadataBlockRoot", dv.isMetadataBlockRoot()) + .add("isFacetRoot", dv.isFacetRoot()); if (dv.getOwner() != null) { bld.add("ownerId", dv.getOwner().getId()); } @@ -340,6 +342,7 @@ private static JsonObjectBuilder addEmbeddedOwnerObject(DvObject dvo, JsonObject ownerObject.add("type", "DATAVERSE"); Dataverse in = (Dataverse) dvo; ownerObject.add("identifier", in.getAlias()); + ownerObject.add("isReleased", in.isReleased()); } if (dvo.isInstanceofDataset()) { diff --git a/src/main/java/edu/harvard/iq/dataverse/validation/PasswordValidatorServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/validation/PasswordValidatorServiceBean.java index 41e7f1b8b22..bbe7d135e0f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/validation/PasswordValidatorServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/validation/PasswordValidatorServiceBean.java @@ -13,6 +13,7 @@ import java.util.Date; import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; import java.util.Properties; import java.util.logging.Level; import java.util.logging.Logger; @@ -83,8 +84,7 @@ private enum ValidatorTypes { GoodStrengthValidator, StandardValidator } - @SuppressWarnings("unchecked") - private final static LinkedHashMap validators = new LinkedHashMap(2); + private static final Map validators = new LinkedHashMap<>(2); private int goodStrength; private int maxLength; private int minLength; @@ -100,7 +100,7 @@ private enum ValidatorTypes { public PasswordValidatorServiceBean() { final Properties properties = PropertiesMessageResolver.getDefaultProperties(); properties.setProperty(GoodStrengthRule.ERROR_CODE_GOODSTRENGTH, GoodStrengthRule.ERROR_MESSAGE_GOODSTRENGTH); - messageResolver = new PropertiesMessageResolver(properties); + messageResolver = new PropertiesMessageResolver(properties); } public PasswordValidatorServiceBean(List characterRules) { diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 5f3e4c33e0b..012b389ce32 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -1464,7 +1464,7 @@ dataset.editBtn.itemLabel.deleteDataset=Delete Dataset dataset.editBtn.itemLabel.deleteDraft=Delete Draft Version dataset.editBtn.itemLabel.deaccession=Deaccession Dataset dataset.exportBtn=Export Metadata -dataset.exportBtn.itemLabel.ddi=DDI +dataset.exportBtn.itemLabel.ddi=DDI Codebook v2 dataset.exportBtn.itemLabel.dublinCore=Dublin Core dataset.exportBtn.itemLabel.schemaDotOrg=Schema.org JSON-LD dataset.exportBtn.itemLabel.datacite=DataCite @@ -1934,7 +1934,7 @@ file.downloadBtn.format.all=All File Formats + Information file.downloadBtn.format.tab=Tab-Delimited file.downloadBtn.format.original={0} (Original File Format) file.downloadBtn.format.rdata=RData -file.downloadBtn.format.var=Variable Metadata +file.downloadBtn.format.var=DDI Codebook v2 file.downloadBtn.format.citation=Data File Citation file.download.filetype.unknown=Original File Format file.more.information.link=Link to more file information for @@ -2065,7 +2065,7 @@ file.deleteFileDialog.multiple.immediate=The file(s) will be deleted after you c file.deleteFileDialog.header=Delete Files file.deleteFileDialog.failed.tip=Files will not be removed from previously published versions of the dataset. file.deaccessionDialog.tip.permanent=Deaccession is permanent. -file.deaccessionDialog.tip=This dataset will no longer be public and a tumbstone will display the reason for deaccessioning.
Please read the documentation if you have any questions. +file.deaccessionDialog.tip=This dataset will no longer be public and a tombstone will display the reason for deaccessioning.
Please read the documentation if you have any questions. file.deaccessionDialog.version=Version file.deaccessionDialog.reason.question1=Which version(s) do you want to deaccession? file.deaccessionDialog.reason.question2=What is the reason for deaccession? diff --git a/src/main/java/propertyFiles/MimeTypeDetectionByFileExtension.properties b/src/main/java/propertyFiles/MimeTypeDetectionByFileExtension.properties index 630539d912e..4507c22fdf8 100644 --- a/src/main/java/propertyFiles/MimeTypeDetectionByFileExtension.properties +++ b/src/main/java/propertyFiles/MimeTypeDetectionByFileExtension.properties @@ -15,6 +15,7 @@ m=text/x-matlab mat=application/matlab-mat md=text/markdown mp3=audio/mp3 +m4a=audio/mp4 nii=image/nii nc=application/netcdf ods=application/vnd.oasis.opendocument.spreadsheet diff --git a/src/main/resources/db/migration/V6.4.0.1.sql b/src/main/resources/db/migration/V6.4.0.1.sql new file mode 100644 index 00000000000..0bcd87dd736 --- /dev/null +++ b/src/main/resources/db/migration/V6.4.0.1.sql @@ -0,0 +1,4 @@ +-- Adding a case-insensitive index related to #11003 +-- + +CREATE UNIQUE INDEX IF NOT EXISTS INDEX_DVOBJECT_authority_protocol_upper_identifier ON dvobject (authority, protocol, UPPER(identifier)); \ No newline at end of file diff --git a/src/main/webapp/dataset-license-terms.xhtml b/src/main/webapp/dataset-license-terms.xhtml index 255e63fbfc2..03173faf989 100644 --- a/src/main/webapp/dataset-license-terms.xhtml +++ b/src/main/webapp/dataset-license-terms.xhtml @@ -12,7 +12,7 @@ or !empty termsOfUseAndAccess.originalArchive or !empty termsOfUseAndAccess.availabilityStatus or !empty termsOfUseAndAccess.contactForAccess or !empty termsOfUseAndAccess.sizeOfCollection or !empty termsOfUseAndAccess.studyCompletion - or termsOfUseAndAccess.fileAccessRequest}"/> + }"/>
  • - + +
  • - + -
  • diff --git a/src/test/java/edu/harvard/iq/dataverse/DatasetTest.java b/src/test/java/edu/harvard/iq/dataverse/DatasetTest.java index 2153a336303..687e0af5b81 100644 --- a/src/test/java/edu/harvard/iq/dataverse/DatasetTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/DatasetTest.java @@ -10,6 +10,7 @@ import static org.junit.jupiter.api.Assertions.*; import java.util.ArrayList; +import java.util.Date; import java.util.List; /** @@ -240,5 +241,41 @@ public void datasetShouldBeDeaccessionedWithDeaccessionedAndDeaccessionedVersion assertTrue(dataset.isDeaccessioned()); } - + + @Test + public void testGetMostRecentMajorVersionReleaseDateWithDeaccessionedVersions() { + List versionList = new ArrayList(); + + long ver = 5; + // 5.2 + DatasetVersion relVersion = new DatasetVersion(); + relVersion.setVersionState(VersionState.RELEASED); + relVersion.setMinorVersionNumber(2L); + relVersion.setVersionNumber(ver); + versionList.add(relVersion); + + // 5.1 + relVersion = new DatasetVersion(); + relVersion.setVersionState(VersionState.DEACCESSIONED); + relVersion.setMinorVersionNumber(1L); + relVersion.setVersionNumber(ver); + versionList.add(relVersion); + + // 5.0, 4.0, 3.0, 2.0, 1.0 + while (ver > 0) { + DatasetVersion deaccessionedVersion = new DatasetVersion(); + deaccessionedVersion.setVersionState(VersionState.DEACCESSIONED); + // only add an actual date to v5.0 so the assertNotNull will only pass if this version's date is returned + deaccessionedVersion.setReleaseTime((ver == 5) ? new Date() : null); + deaccessionedVersion.setMinorVersionNumber(0L); + deaccessionedVersion.setVersionNumber(ver--); + versionList.add(deaccessionedVersion); + } + + Dataset dataset = new Dataset(); + dataset.setVersions(versionList); + + Date releaseDate = dataset.getMostRecentMajorVersionReleaseDate(); + assertNotNull(releaseDate); + } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetTypesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetTypesIT.java index e8426b638d7..a0b9f5325d0 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetTypesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetTypesIT.java @@ -94,7 +94,8 @@ public void testCreateSoftwareDatasetNative() { String dataset2Pid = JsonPath.from(createDataset.getBody().asString()).getString("data.persistentId"); UtilIT.publishDatasetViaNativeApi(dataset2Pid, "major", apiToken).then().assertThat().statusCode(OK.getStatusCode()); - + //An explicit sleep is needed here because the searchAndShowFacets won't sleep for the query used here + UtilIT.sleepForReindex(dataset2Pid, apiToken, 5); Response searchCollection = UtilIT.searchAndShowFacets("parentName:" + dataverseAlias, null); searchCollection.prettyPrint(); searchCollection.then().assertThat() diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index f52aa4fe9bd..93f1024ae7a 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -667,6 +667,60 @@ public void testCreatePublishDestroyDataset() { deleteDatasetResponse.prettyPrint(); assertEquals(200, deleteDatasetResponse.getStatusCode()); + // Start of test of deleting a file from a deaccessioned version. + + // Create Dataset for deaccession test. + Response deaccessionTestDataset = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + deaccessionTestDataset.prettyPrint(); + deaccessionTestDataset.then().assertThat().statusCode(CREATED.getStatusCode()); + Integer deaccessionTestDatasetId = UtilIT.getDatasetIdFromResponse(deaccessionTestDataset); + + // File upload for deaccession test. + String pathToFile = "src/main/webapp/resources/images/dataverseproject.png"; + Response uploadResponse = UtilIT.uploadFileViaNative(deaccessionTestDatasetId.toString(), pathToFile, apiToken); + uploadResponse.prettyPrint(); + uploadResponse.then().assertThat().statusCode(OK.getStatusCode()); + Integer deaccessionTestFileId = JsonPath.from(uploadResponse.body().asString()).getInt("data.files[0].dataFile.id"); + + // Publish Dataset for deaccession test. + Response deaccessionTestPublishResponse = UtilIT.publishDatasetViaNativeApi(deaccessionTestDatasetId, "major", apiToken); + deaccessionTestPublishResponse.prettyPrint(); + + // Deaccession Dataset for deaccession test. + Response deaccessionTestDatasetResponse = UtilIT.deaccessionDataset(deaccessionTestDatasetId, DS_VERSION_LATEST_PUBLISHED, "Test deaccession reason.", null, apiToken); + deaccessionTestDatasetResponse.prettyPrint(); + deaccessionTestDatasetResponse.then().assertThat().statusCode(OK.getStatusCode()); + + // Version check for deaccession test - Deaccessioned. + Response deaccessionTestVersions = UtilIT.getDatasetVersions(deaccessionTestDatasetId.toString(), apiToken); + deaccessionTestVersions.prettyPrint(); + deaccessionTestVersions.then().assertThat() + .body("data[0].latestVersionPublishingState", equalTo("DEACCESSIONED")) + .statusCode(OK.getStatusCode()); + + // File deletion / Draft creation due diligence check for deaccession test. + Response deaccessionTestDeleteFile = UtilIT.deleteFileInDataset(deaccessionTestFileId, apiToken); + deaccessionTestDeleteFile.prettyPrint(); + deaccessionTestDeleteFile + .then().assertThat() + .statusCode(OK.getStatusCode()); + + // Version check for deaccession test - Draft. + deaccessionTestVersions = UtilIT.getDatasetVersions(deaccessionTestDatasetId.toString(), apiToken); + deaccessionTestVersions.prettyPrint(); + deaccessionTestVersions.then().assertThat() + .body("data[0].latestVersionPublishingState", equalTo("DRAFT")) + .statusCode(OK.getStatusCode()); + + // Deleting Dataset for deaccession test. + Response deaccessionTestDelete = UtilIT.destroyDataset(deaccessionTestDatasetId, apiToken); + deaccessionTestDelete.prettyPrint(); + deaccessionTestDelete.then() + .assertThat() + .statusCode(OK.getStatusCode()); + + // End of deaccession test. + Response deleteDataverseResponse = UtilIT.deleteDataverse(dataverseAlias, apiToken); deleteDataverseResponse.prettyPrint(); assertEquals(200, deleteDataverseResponse.getStatusCode()); @@ -2080,8 +2134,11 @@ public void testGetDatasetOwners() { Response getDatasetWithOwners = UtilIT.getDatasetWithOwners(persistentId, apiToken, true); getDatasetWithOwners.prettyPrint(); - getDatasetWithOwners.then().assertThat().body("data.isPartOf.identifier", equalTo(dataverseAlias)); - + getDatasetWithOwners.then().assertThat().body("data.isPartOf.identifier", equalTo(dataverseAlias)); + getDatasetWithOwners.then().assertThat().body("data.isPartOf.isReleased", equalTo(false)); + getDatasetWithOwners.then().assertThat().body("data.isPartOf.isPartOf.identifier", equalTo("root")); + getDatasetWithOwners.then().assertThat().body("data.isPartOf.isPartOf.isReleased", equalTo(true)); + Response destroyDatasetResponse = UtilIT.destroyDataset(datasetId, apiToken); assertEquals(200, destroyDatasetResponse.getStatusCode()); @@ -2963,6 +3020,34 @@ public void testLinkingDatasets() { linkDataset.then().assertThat() .statusCode(OK.getStatusCode()); + // Link another to test the list of linked datasets + Response createDataverse3 = UtilIT.createRandomDataverse(apiToken); + createDataverse3.prettyPrint(); + createDataverse3.then().assertThat() + .statusCode(CREATED.getStatusCode()); + String dataverse3Alias = UtilIT.getAliasFromResponse(createDataverse3); + Integer dataverse3Id = UtilIT.getDatasetIdFromResponse(createDataverse3); + linkDataset = UtilIT.linkDataset(datasetPid, dataverse3Alias, superuserApiToken); + linkDataset.prettyPrint(); + linkDataset.then().assertThat() + .statusCode(OK.getStatusCode()); + // get the list in Json format + Response linkDatasetsResponse = UtilIT.getDatasetLinks(datasetPid, superuserApiToken); + linkDatasetsResponse.prettyPrint(); + linkDatasetsResponse.then().assertThat() + .statusCode(OK.getStatusCode()); + JsonObject linkDatasets = Json.createReader(new StringReader(linkDatasetsResponse.asString())).readObject(); + JsonArray lst = linkDatasets.getJsonObject("data").getJsonArray("linked-dataverses"); + List ids = List.of(dataverse2Id, dataverse3Id); + List uniqueids = new ArrayList<>(); + assertEquals(ids.size(), lst.size()); + for (int i = 0; i < lst.size(); i++) { + int id = lst.getJsonObject(i).getInt("id"); + assertTrue(ids.contains(id)); + assertFalse(uniqueids.contains(id)); + uniqueids.add(id); + } + //Experimental code for trying to trick test into thinking the dataset has been harvested /* createDataset = UtilIT.createRandomDatasetViaNativeApi(dataverse1Alias, apiToken); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java index 6fbe91c8405..9567cf3910a 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DataversesIT.java @@ -1,12 +1,15 @@ package edu.harvard.iq.dataverse.api; import io.restassured.RestAssured; + import static io.restassured.RestAssured.given; import static io.restassured.path.json.JsonPath.with; + import io.restassured.response.Response; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; + import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; @@ -14,6 +17,7 @@ import java.util.Arrays; import java.util.List; import java.util.logging.Logger; + import jakarta.json.Json; import jakarta.json.JsonObject; import jakarta.json.JsonObjectBuilder; @@ -31,6 +35,7 @@ import static org.junit.jupiter.api.Assertions.*; import java.nio.file.Files; + import io.restassured.path.json.JsonPath; import org.hamcrest.CoreMatchers; import org.hamcrest.Matchers; @@ -43,7 +48,7 @@ public class DataversesIT { public static void setUpClass() { RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); } - + @AfterAll public static void afterClass() { Response removeExcludeEmail = UtilIT.deleteSetting(SettingsServiceBean.Key.ExcludeEmailFromExport); @@ -130,14 +135,16 @@ public void testDataverseCategory() { public void testMinimalDataverse() throws FileNotFoundException { Response createUser = UtilIT.createRandomUser(); createUser.prettyPrint(); - String username = UtilIT.getUsernameFromResponse(createUser); String apiToken = UtilIT.getApiTokenFromResponse(createUser); JsonObject dvJson; FileReader reader = new FileReader("doc/sphinx-guides/source/_static/api/dataverse-minimal.json"); dvJson = Json.createReader(reader).readObject(); Response create = UtilIT.createDataverse(dvJson, apiToken); create.prettyPrint(); - create.then().assertThat().statusCode(CREATED.getStatusCode()); + create.then().assertThat() + .body("data.isMetadataBlockRoot", equalTo(false)) + .body("data.isFacetRoot", equalTo(false)) + .statusCode(CREATED.getStatusCode()); Response deleteDataverse = UtilIT.deleteDataverse("science", apiToken); deleteDataverse.prettyPrint(); deleteDataverse.then().assertThat().statusCode(OK.getStatusCode()); @@ -646,10 +653,182 @@ public void testImportDDI() throws IOException, InterruptedException { Response deleteUserResponse = UtilIT.deleteUser(username); assertEquals(200, deleteUserResponse.getStatusCode()); } - + @Test - public void testAttributesApi() throws Exception { + public void testImport() throws IOException, InterruptedException { + + Response createUser = UtilIT.createRandomUser(); + String username = UtilIT.getUsernameFromResponse(createUser); + Response makeSuperUser = UtilIT.makeSuperUser(username); + assertEquals(200, makeSuperUser.getStatusCode()); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response publishDataverse = UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken); + assertEquals(200, publishDataverse.getStatusCode()); + + JsonObjectBuilder datasetJson = Json.createObjectBuilder() + .add("datasetVersion", Json.createObjectBuilder() + .add("license", Json.createObjectBuilder() + .add("name", "CC0 1.0") + ) + .add("metadataBlocks", Json.createObjectBuilder() + .add("citation", Json.createObjectBuilder() + .add("fields", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("typeName", "title") + .add("value", "Test Dataset") + .add("typeClass", "primitive") + .add("multiple", false) + ) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("authorName", + Json.createObjectBuilder() + .add("value", "Simpson, Homer") + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", "authorName")) + ) + ) + .add("typeClass", "compound") + .add("multiple", true) + .add("typeName", "author") + ) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("datasetContactEmail", + Json.createObjectBuilder() + .add("value", "hsimpson@mailinator.com") + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", "datasetContactEmail")) + ) + ) + .add("typeClass", "compound") + .add("multiple", true) + .add("typeName", "datasetContact") + ) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add(Json.createObjectBuilder() + .add("dsDescriptionValue", + Json.createObjectBuilder() + .add("value", "This a test dataset.") + .add("typeClass", "primitive") + .add("multiple", false) + .add("typeName", "dsDescriptionValue")) + ) + ) + .add("typeClass", "compound") + .add("multiple", true) + .add("typeName", "dsDescription") + ) + .add(Json.createObjectBuilder() + .add("value", Json.createArrayBuilder() + .add("Other") + ) + .add("typeClass", "controlledVocabulary") + .add("multiple", true) + .add("typeName", "subject") + ) + ) + ) + )); + + String json = datasetJson.build().toString(); + + Response importJSONNoPid = UtilIT.importDatasetViaNativeApi(apiToken, dataverseAlias, json, null, "no"); + logger.info(importJSONNoPid.prettyPrint()); + assertEquals(400, importJSONNoPid.getStatusCode()); + + String body = importJSONNoPid.getBody().asString(); + String status = JsonPath.from(body).getString("status"); + assertEquals("ERROR", status); + + String message = JsonPath.from(body).getString("message"); + assertEquals( + "Please provide a persistent identifier, either by including it in the JSON, or by using the pid query parameter.", + message + ); + + Response importJSONNoPidRelease = UtilIT.importDatasetViaNativeApi(apiToken, dataverseAlias, json, null, "yes"); + logger.info( importJSONNoPidRelease.prettyPrint()); + assertEquals(400, importJSONNoPidRelease.getStatusCode()); + + body = importJSONNoPidRelease.getBody().asString(); + status = JsonPath.from(body).getString("status"); + assertEquals("ERROR", status); + + message = JsonPath.from(body).getString("message"); + assertEquals( + "Please provide a persistent identifier, either by including it in the JSON, or by using the pid query parameter.", + message + ); + + Response importJSONUnmanagedPid = UtilIT.importDatasetViaNativeApi(apiToken, dataverseAlias, json, "doi:10.5073/FK2/ABCD11", "no"); + logger.info(importJSONUnmanagedPid.prettyPrint()); + assertEquals(400, importJSONUnmanagedPid.getStatusCode()); + + body = importJSONUnmanagedPid.getBody().asString(); + status = JsonPath.from(body).getString("status"); + assertEquals("ERROR", status); + + message = JsonPath.from(body).getString("message"); + assertEquals( + "Cannot import a dataset that has a PID that doesn't match the server's settings", + message + ); + + // Under normal conditions, you shouldn't need to destroy these datasets. + // Uncomment if they're still around from a previous failed run. +// Response destroy1 = UtilIT.destroyDataset("doi:10.5072/FK2/ABCD11", apiToken); +// destroy1.prettyPrint(); +// Response destroy2 = UtilIT.destroyDataset("doi:10.5072/FK2/ABCD22", apiToken); +// destroy2.prettyPrint(); + + Response importJSONPid = UtilIT.importDatasetViaNativeApi(apiToken, dataverseAlias, json, "doi:10.5072/FK2/ABCD11", "no"); + logger.info(importJSONPid.prettyPrint()); + assertEquals(201, importJSONPid.getStatusCode()); + + Response importJSONPidRel = UtilIT.importDatasetViaNativeApi(apiToken, dataverseAlias, json, "doi:10.5072/FK2/ABCD22", "yes"); + logger.info(importJSONPidRel.prettyPrint()); + assertEquals(201, importJSONPidRel.getStatusCode()); + + Integer datasetIdInt = JsonPath.from(importJSONPid.body().asString()).getInt("data.id"); + + Response search1 = UtilIT.search("id:dataset_" + datasetIdInt + "_draft", apiToken); // santity check, can find it + search1.prettyPrint(); + search1.then().assertThat() + .body("data.total_count", CoreMatchers.is(1)) + .body("data.count_in_response", CoreMatchers.is(1)) + .body("data.items[0].name", CoreMatchers.is("Test Dataset")) + .statusCode(OK.getStatusCode()); + + //cleanup + + Response destroyDatasetResponse = UtilIT.destroyDataset(datasetIdInt, apiToken); + assertEquals(200, destroyDatasetResponse.getStatusCode()); + + Integer datasetIdIntPidRel = JsonPath.from(importJSONPidRel.body().asString()).getInt("data.id"); + Response destroyDatasetResponsePidRel = UtilIT.destroyDataset(datasetIdIntPidRel, apiToken); + assertEquals(200, destroyDatasetResponsePidRel.getStatusCode()); + + UtilIT.sleepForDeadlock(UtilIT.MAXIMUM_IMPORT_DURATION); + + Response deleteDataverseResponse = UtilIT.deleteDataverse(dataverseAlias, apiToken); + assertEquals(200, deleteDataverseResponse.getStatusCode()); + + Response deleteUserResponse = UtilIT.deleteUser(username); + assertEquals(200, deleteUserResponse.getStatusCode()); + } + @Test + public void testAttributesApi() { Response createUser = UtilIT.createRandomUser(); String apiToken = UtilIT.getApiTokenFromResponse(createUser); @@ -664,30 +843,70 @@ public void testAttributesApi() throws Exception { String collectionAlias = UtilIT.getAliasFromResponse(createDataverseResponse); String newCollectionAlias = collectionAlias + "RENAMED"; - - // Change the alias of the collection: - - Response changeAttributeResp = UtilIT.setCollectionAttribute(collectionAlias, "alias", newCollectionAlias, apiToken); - changeAttributeResp.prettyPrint(); - + + // Change the name of the collection: + + String newCollectionName = "Renamed Name"; + Response changeAttributeResp = UtilIT.setCollectionAttribute(collectionAlias, "name", newCollectionName, apiToken); changeAttributeResp.then().assertThat() .statusCode(OK.getStatusCode()) .body("message.message", equalTo("Update successful")); - - // Check on the collection, under the new alias: - + + // Change the description of the collection: + + String newDescription = "Renamed Description"; + changeAttributeResp = UtilIT.setCollectionAttribute(collectionAlias, "description", newDescription, apiToken); + changeAttributeResp.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("message.message", equalTo("Update successful")); + + // Change the affiliation of the collection: + + String newAffiliation = "Renamed Affiliation"; + changeAttributeResp = UtilIT.setCollectionAttribute(collectionAlias, "affiliation", newAffiliation, apiToken); + changeAttributeResp.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("message.message", equalTo("Update successful")); + + // Cannot update filePIDsEnabled from a regular user: + + changeAttributeResp = UtilIT.setCollectionAttribute(collectionAlias, "filePIDsEnabled", "true", apiToken); + changeAttributeResp.then().assertThat() + .statusCode(UNAUTHORIZED.getStatusCode()); + + // Change the alias of the collection: + + changeAttributeResp = UtilIT.setCollectionAttribute(collectionAlias, "alias", newCollectionAlias, apiToken); + changeAttributeResp.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("message.message", equalTo("Update successful")); + + // Check on the collection, under the new alias: + Response collectionInfoResponse = UtilIT.exportDataverse(newCollectionAlias, apiToken); - collectionInfoResponse.prettyPrint(); - collectionInfoResponse.then().assertThat() .statusCode(OK.getStatusCode()) - .body("data.alias", equalTo(newCollectionAlias)); - + .body("data.alias", equalTo(newCollectionAlias)) + .body("data.name", equalTo(newCollectionName)) + .body("data.description", equalTo(newDescription)) + .body("data.affiliation", equalTo(newAffiliation)); + // Delete the collection (again, using its new alias): - + Response deleteCollectionResponse = UtilIT.deleteDataverse(newCollectionAlias, apiToken); - deleteCollectionResponse.prettyPrint(); assertEquals(OK.getStatusCode(), deleteCollectionResponse.getStatusCode()); + + // Cannot update root collection from a regular user: + + changeAttributeResp = UtilIT.setCollectionAttribute("root", "name", newCollectionName, apiToken); + changeAttributeResp.then().assertThat() + .statusCode(UNAUTHORIZED.getStatusCode()); + + collectionInfoResponse = UtilIT.exportDataverse("root", apiToken); + + collectionInfoResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.name", equalTo("Root")); } @Test @@ -699,6 +918,17 @@ public void testListMetadataBlocks() { createDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode()); String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + // New Dataverse should return just the citation block and its displayOnCreate fields when onlyDisplayedOnCreate=true and returnDatasetFieldTypes=true + Response listMetadataBlocks = UtilIT.listMetadataBlocks(dataverseAlias, true, true, apiToken); + listMetadataBlocks.prettyPrint(); + listMetadataBlocks.then().assertThat().statusCode(OK.getStatusCode()); + listMetadataBlocks.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.size()", equalTo(1)) + .body("data[0].name", is("citation")) + .body("data[0].fields.title.displayOnCreate", equalTo(true)) + .body("data[0].fields.size()", is(28)); + Response setMetadataBlocksResponse = UtilIT.setMetadataBlocks(dataverseAlias, Json.createArrayBuilder().add("citation").add("astrophysics"), apiToken); setMetadataBlocksResponse.then().assertThat().statusCode(OK.getStatusCode()); @@ -1080,6 +1310,111 @@ public void testAddDataverse() { .body("message", equalTo("Invalid metadata block name: \"" + invalidMetadataBlockName + "\"")); } + @Test + public void testUpdateDataverse() { + Response createUser = UtilIT.createRandomUser(); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + String testAliasSuffix = "-update-dataverse"; + + String testDataverseAlias = UtilIT.getRandomDvAlias() + testAliasSuffix; + Response createSubDataverseResponse = UtilIT.createSubDataverse(testDataverseAlias, null, apiToken, "root"); + createSubDataverseResponse.then().assertThat().statusCode(CREATED.getStatusCode()); + + String newAlias = UtilIT.getRandomDvAlias() + testAliasSuffix; + String newName = "New Test Dataverse Name"; + String newAffiliation = "New Test Dataverse Affiliation"; + String newDataverseType = Dataverse.DataverseType.TEACHING_COURSES.toString(); + String[] newContactEmails = new String[]{"new_email@dataverse.com"}; + String[] newInputLevelNames = new String[]{"geographicCoverage"}; + String[] newFacetIds = new String[]{"contributorName"}; + String[] newMetadataBlockNames = new String[]{"citation", "geospatial", "biomedical"}; + + Response updateDataverseResponse = UtilIT.updateDataverse( + testDataverseAlias, + newAlias, + newName, + newAffiliation, + newDataverseType, + newContactEmails, + newInputLevelNames, + newFacetIds, + newMetadataBlockNames, + apiToken + ); + + // Assert dataverse properties are updated + updateDataverseResponse.then().assertThat().statusCode(OK.getStatusCode()); + String actualDataverseAlias = updateDataverseResponse.then().extract().path("data.alias"); + assertEquals(newAlias, actualDataverseAlias); + String actualDataverseName = updateDataverseResponse.then().extract().path("data.name"); + assertEquals(newName, actualDataverseName); + String actualDataverseAffiliation = updateDataverseResponse.then().extract().path("data.affiliation"); + assertEquals(newAffiliation, actualDataverseAffiliation); + String actualDataverseType = updateDataverseResponse.then().extract().path("data.dataverseType"); + assertEquals(newDataverseType, actualDataverseType); + String actualContactEmail = updateDataverseResponse.then().extract().path("data.dataverseContacts[0].contactEmail"); + assertEquals("new_email@dataverse.com", actualContactEmail); + + // Assert metadata blocks are updated + Response listMetadataBlocksResponse = UtilIT.listMetadataBlocks(newAlias, false, false, apiToken); + String actualDataverseMetadataBlock1 = listMetadataBlocksResponse.then().extract().path("data[0].name"); + String actualDataverseMetadataBlock2 = listMetadataBlocksResponse.then().extract().path("data[1].name"); + String actualDataverseMetadataBlock3 = listMetadataBlocksResponse.then().extract().path("data[2].name"); + assertThat(newMetadataBlockNames, hasItemInArray(actualDataverseMetadataBlock1)); + assertThat(newMetadataBlockNames, hasItemInArray(actualDataverseMetadataBlock2)); + assertThat(newMetadataBlockNames, hasItemInArray(actualDataverseMetadataBlock3)); + + // Assert custom facets are updated + Response listDataverseFacetsResponse = UtilIT.listDataverseFacets(newAlias, apiToken); + String actualFacetName = listDataverseFacetsResponse.then().extract().path("data[0]"); + assertThat(newFacetIds, hasItemInArray(actualFacetName)); + + // Assert input levels are updated + Response listDataverseInputLevelsResponse = UtilIT.listDataverseInputLevels(newAlias, apiToken); + String actualInputLevelName = listDataverseInputLevelsResponse.then().extract().path("data[0].datasetFieldTypeName"); + assertThat(newInputLevelNames, hasItemInArray(actualInputLevelName)); + + // The alias has been changed, so we should not be able to do any operation using the old one + String oldDataverseAlias = testDataverseAlias; + Response getDataverseResponse = UtilIT.listDataverseFacets(oldDataverseAlias, apiToken); + getDataverseResponse.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + // Should return error when the dataverse to edit does not exist + updateDataverseResponse = UtilIT.updateDataverse( + "unexistingDataverseAlias", + newAlias, + newName, + newAffiliation, + newDataverseType, + newContactEmails, + newInputLevelNames, + newFacetIds, + newMetadataBlockNames, + apiToken + ); + updateDataverseResponse.then().assertThat().statusCode(NOT_FOUND.getStatusCode()); + + // User with unprivileged API token cannot update Root dataverse + updateDataverseResponse = UtilIT.updateDataverse( + "root", + newAlias, + newName, + newAffiliation, + newDataverseType, + newContactEmails, + newInputLevelNames, + newFacetIds, + newMetadataBlockNames, + apiToken + ); + updateDataverseResponse.then().assertThat().statusCode(UNAUTHORIZED.getStatusCode()); + + Response rootCollectionInfoResponse = UtilIT.exportDataverse("root", apiToken); + rootCollectionInfoResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.name", equalTo("Root")); + } + @Test public void testListFacets() { Response createUserResponse = UtilIT.createRandomUser(); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java index 0153d8dc893..6e7061961f0 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/MetadataBlocksIT.java @@ -51,7 +51,7 @@ void testListMetadataBlocks() { // onlyDisplayedOnCreate=true and returnDatasetFieldTypes=true listMetadataBlocksResponse = UtilIT.listMetadataBlocks(true, true); - expectedNumberOfMetadataFields = 26; + expectedNumberOfMetadataFields = 28; listMetadataBlocksResponse.then().assertThat() .statusCode(OK.getStatusCode()) .body("data[0].fields", not(equalTo(null))) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MoveIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MoveIT.java index f7135ce7f3b..8951b0bd42e 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/MoveIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/MoveIT.java @@ -278,8 +278,8 @@ public void testMoveLinkedDataset() { .body("message", equalTo("Use the query parameter forceMove=true to complete the move. This dataset is linked to the new host dataverse or one of its parents. This move would remove the link to this dataset. ")); JsonObject linksBeforeData = Json.createReader(new StringReader(getLinksBefore.asString())).readObject(); - assertEquals("OK", linksBeforeData.getString("status")); - assertEquals(dataverse2Alias + " (id " + dataverse2Id + ")", linksBeforeData.getJsonObject("data").getJsonArray("dataverses that link to dataset id " + datasetId).getString(0)); + assertEquals(datasetId, linksBeforeData.getJsonObject("data").getInt("id")); + assertEquals(dataverse2Id, linksBeforeData.getJsonObject("data").getJsonArray("linked-dataverses").get(0).asJsonObject().getInt("id")); boolean forceMove = true; Response forceMoveLinkedDataset = UtilIT.moveDataset(datasetId.toString(), dataverse2Alias, forceMove, superuserApiToken); @@ -308,8 +308,7 @@ public void testMoveLinkedDataset() { JsonObject linksAfterData = Json.createReader(new StringReader(getLinksAfter.asString())).readObject(); assertEquals("OK", linksAfterData.getString("status")); - assertEquals(0, linksAfterData.getJsonObject("data").getJsonArray("dataverses that link to dataset id " + datasetId).size()); - + assertEquals(0, linksAfterData.getJsonObject("data").getJsonArray("linked-dataverses").size()); } @Test diff --git a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java index 6058ab17d72..1d63173f5b7 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java @@ -1629,8 +1629,8 @@ public void testSearchFilesAndUrlImages() { .statusCode(200); pathToFile = "src/main/webapp/resources/js/mydata.js"; Response uploadFile = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken); - uploadImage.prettyPrint(); - uploadImage.then().assertThat() + uploadFile.prettyPrint(); + uploadFile.then().assertThat() .statusCode(200); Response publishDataverse = UtilIT.publishDataverseViaSword(dataverseAlias, apiToken); @@ -1658,7 +1658,7 @@ public void testSearchFilesAndUrlImages() { .statusCode(OK.getStatusCode()) .body("data.items[0].type", CoreMatchers.is("dataverse")) .body("data.items[0].url", CoreMatchers.containsString("/dataverse/")) - .body("data.items[0]", CoreMatchers.not(CoreMatchers.hasItem("url_image"))); + .body("data.items[0]", CoreMatchers.not(CoreMatchers.hasItem("image_url"))); searchResp = UtilIT.search("mydata", apiToken); searchResp.prettyPrint(); @@ -1666,6 +1666,6 @@ public void testSearchFilesAndUrlImages() { .statusCode(OK.getStatusCode()) .body("data.items[0].type", CoreMatchers.is("file")) .body("data.items[0].url", CoreMatchers.containsString("/datafile/")) - .body("data.items[0]", CoreMatchers.not(CoreMatchers.hasItem("url_image"))); + .body("data.items[0]", CoreMatchers.not(CoreMatchers.hasItem("image_url"))); } } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UsersIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UsersIT.java index 0189ffd6e58..ce3b8bf75ff 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UsersIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UsersIT.java @@ -23,6 +23,8 @@ import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.Matchers.contains; import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.hamcrest.CoreMatchers; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -371,29 +373,38 @@ public void testAPITokenEndpoints() { .body("data.message", containsString(userApiToken)) .body("data.message", containsString("expires on")); + // Recreate given a bad API token Response recreateToken = UtilIT.recreateToken("BAD-Token-blah-89234"); recreateToken.prettyPrint(); recreateToken.then().assertThat() .statusCode(UNAUTHORIZED.getStatusCode()); + // Recreate given a valid API token recreateToken = UtilIT.recreateToken(userApiToken); recreateToken.prettyPrint(); recreateToken.then().assertThat() .statusCode(OK.getStatusCode()) - .body("data.message", containsString("New token for")); + .body("data.message", containsString("New token for")) + .body("data.message", CoreMatchers.not(containsString("and expires on"))); + // Recreate given a valid API token and returning expiration createUser = UtilIT.createRandomUser(); - createUser.prettyPrint(); - assertEquals(200, createUser.getStatusCode()); + assertEquals(OK.getStatusCode(), createUser.getStatusCode()); + + userApiToken = UtilIT.getApiTokenFromResponse(createUser); + + recreateToken = UtilIT.recreateToken(userApiToken, true); + recreateToken.prettyPrint(); + recreateToken.then().assertThat() + .statusCode(OK.getStatusCode()) + .body("data.message", containsString("New token for")) + .body("data.message", containsString("and expires on")); - String userApiTokenForDelete = UtilIT.getApiTokenFromResponse(createUser); - /* Add tests for Private URL */ createUser = UtilIT.createRandomUser(); - String username = UtilIT.getUsernameFromResponse(createUser); String apiToken = UtilIT.getApiTokenFromResponse(createUser); Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); createDataverseResponse.prettyPrint(); @@ -416,8 +427,12 @@ public void testAPITokenEndpoints() { getExpiration = UtilIT.getTokenExpiration(tokenForPrivateUrlUser); getExpiration.prettyPrint(); getExpiration.then().assertThat() - .statusCode(NOT_FOUND.getStatusCode()); + .statusCode(UNAUTHORIZED.getStatusCode()); + createUser = UtilIT.createRandomUser(); + assertEquals(OK.getStatusCode(), createUser.getStatusCode()); + + String userApiTokenForDelete = UtilIT.getApiTokenFromResponse(createUser); Response deleteToken = UtilIT.deleteToken(userApiTokenForDelete); deleteToken.prettyPrint(); diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 4e20e8e4c33..502f1ecb0a8 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse.api; +import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder; import io.restassured.http.ContentType; import io.restassured.path.json.JsonPath; import io.restassured.response.Response; @@ -12,6 +13,7 @@ import jakarta.json.JsonArrayBuilder; import jakarta.json.JsonObject; +import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; import static jakarta.ws.rs.core.Response.Status.CREATED; import java.nio.charset.StandardCharsets; @@ -389,6 +391,48 @@ static Response createSubDataverse(String alias, String category, String apiToke objectBuilder.add("affiliation", affiliation); } + updateDataverseRequestJsonWithMetadataBlocksConfiguration(inputLevelNames, facetIds, metadataBlockNames, objectBuilder); + + JsonObject dvData = objectBuilder.build(); + return given() + .body(dvData.toString()).contentType(ContentType.JSON) + .when().post("/api/dataverses/" + parentDV + "?key=" + apiToken); + } + + static Response updateDataverse(String alias, + String newAlias, + String newName, + String newAffiliation, + String newDataverseType, + String[] newContactEmails, + String[] newInputLevelNames, + String[] newFacetIds, + String[] newMetadataBlockNames, + String apiToken) { + JsonArrayBuilder contactArrayBuilder = Json.createArrayBuilder(); + for(String contactEmail : newContactEmails) { + contactArrayBuilder.add(Json.createObjectBuilder().add("contactEmail", contactEmail)); + } + NullSafeJsonBuilder jsonBuilder = jsonObjectBuilder() + .add("alias", newAlias) + .add("name", newName) + .add("affiliation", newAffiliation) + .add("dataverseContacts", contactArrayBuilder) + .add("dataverseType", newDataverseType) + .add("affiliation", newAffiliation); + + updateDataverseRequestJsonWithMetadataBlocksConfiguration(newInputLevelNames, newFacetIds, newMetadataBlockNames, jsonBuilder); + + JsonObject dvData = jsonBuilder.build(); + return given() + .body(dvData.toString()).contentType(ContentType.JSON) + .when().put("/api/dataverses/" + alias + "?key=" + apiToken); + } + + private static void updateDataverseRequestJsonWithMetadataBlocksConfiguration(String[] inputLevelNames, + String[] facetIds, + String[] metadataBlockNames, + JsonObjectBuilder objectBuilder) { JsonObjectBuilder metadataBlocksObjectBuilder = Json.createObjectBuilder(); if (inputLevelNames != null) { @@ -420,12 +464,6 @@ static Response createSubDataverse(String alias, String category, String apiToke } objectBuilder.add("metadataBlocks", metadataBlocksObjectBuilder); - - JsonObject dvData = objectBuilder.build(); - Response createDataverseResponse = given() - .body(dvData.toString()).contentType(ContentType.JSON) - .when().post("/api/dataverses/" + parentDV + "?key=" + apiToken); - return createDataverseResponse; } static Response createDataverse(JsonObject dvData, String apiToken) { @@ -2813,10 +2851,15 @@ static Response getTokenExpiration( String apiToken) { return response; } - static Response recreateToken( String apiToken) { + static Response recreateToken(String apiToken) { + return recreateToken(apiToken, false); + } + + static Response recreateToken(String apiToken, boolean returnExpiration) { Response response = given() - .header(API_TOKEN_HTTP_HEADER, apiToken) - .post("api/users/token/recreate"); + .header(API_TOKEN_HTTP_HEADER, apiToken) + .queryParam("returnExpiration", returnExpiration) + .post("api/users/token/recreate"); return response; } @@ -3672,6 +3715,35 @@ static Response importDatasetDDIViaNativeApi(String apiToken, String dataverseAl return importDDI.post(postString); } + + static Response importDatasetViaNativeApi(String apiToken, String dataverseAlias, String json, String pid, String release) { + String postString = "/api/dataverses/" + dataverseAlias + "/datasets/:import"; + if (pid != null || release != null ) { + //postString = postString + "?"; + if (pid != null) { + postString = postString + "?pid=" + pid; + if (release != null && release.compareTo("yes") == 0) { + postString = postString + "&release=" + release; + } + } else { + if (release != null && release.compareTo("yes") == 0) { + postString = postString + "?release=" + release; + } + } + } + logger.info("Here importDatasetViaNativeApi"); + logger.info(postString); + + RequestSpecification importJSON = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .urlEncodingEnabled(false) + .body(json) + .contentType("application/json"); + + return importJSON.post(postString); + } + + static Response retrieveMyDataAsJsonString(String apiToken, String userIdentifier, ArrayList roleIds) { Response response = given() .header(API_TOKEN_HTTP_HEADER, apiToken) diff --git a/src/test/java/edu/harvard/iq/dataverse/authorization/AuthenticationProviderTest.java b/src/test/java/edu/harvard/iq/dataverse/authorization/AuthenticationProviderTest.java index eac9a605c9e..d4d7b6fa69d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/authorization/AuthenticationProviderTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/authorization/AuthenticationProviderTest.java @@ -15,7 +15,7 @@ public class AuthenticationProviderTest { - private final static String[] authProviders = {"null", "builtin", "github", "google", "orcid", "orcid-sandbox", "shib"}; + private static final String[] authProviders = {"null", "builtin", "github", "google", "orcid", "orcid-sandbox", "shib"}; private static Map bundleTestMap; @BeforeAll diff --git a/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java new file mode 100644 index 00000000000..f49ebcea39c --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/engine/command/impl/CreateNewDataFilesTest.java @@ -0,0 +1,264 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.storageuse.UploadSessionQuotaLimit; +import edu.harvard.iq.dataverse.util.JhoveFileType; +import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; +import org.jetbrains.annotations.NotNull; +import org.joda.time.DateTime; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.mockito.MockedStatic; +import org.mockito.Mockito; + +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.PrintStream; +import java.nio.file.Files; +import java.nio.file.NoSuchFileException; +import java.nio.file.Path; +import java.security.SecureRandom; +import java.text.MessageFormat; +import java.util.zip.ZipEntry; +import java.util.zip.ZipOutputStream; + +import static edu.harvard.iq.dataverse.DataFile.ChecksumType.MD5; +import static org.apache.commons.io.file.FilesUncheck.createDirectories; +import static org.apache.commons.io.file.PathUtils.deleteDirectory; +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.mockito.ArgumentMatchers.any; + + +@LocalJvmSettings +public class CreateNewDataFilesTest { + // TODO keep constants for annotations in sync with class name + Path testDir = Path.of("target/test/").resolve(getClass().getSimpleName()); + PrintStream original_stderr; + + @BeforeEach + public void cleanTmpDir() throws IOException { + original_stderr = System.err; + if(testDir.toFile().exists()) + deleteDirectory(testDir); + } + + @AfterEach void restoreStderr() { + System.setErr(original_stderr); + } + + @Test + @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/CreateNewDataFilesTest/tmp") + public void execute_fails_to_upload_when_tmp_does_not_exist() throws FileNotFoundException { + + mockTmpLookup(); + var cmd = createCmd("scripts/search/data/shape/shapefile.zip", mockDatasetVersion(), 1000L, 500L); + var ctxt = mockCommandContext(mockSysConfig(true, 0L, MD5, 10)); + + assertThatThrownBy(() -> cmd.execute(ctxt)) + .isInstanceOf(CommandException.class) + .hasMessageContaining("Failed to save the upload as a temp file (temp disk space?)") + .hasRootCauseInstanceOf(NoSuchFileException.class) + .getRootCause() + .hasMessageStartingWith("target/test/CreateNewDataFilesTest/tmp/temp/tmp"); + } + + @Test + @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/CreateNewDataFilesTest/tmp") + public void execute_fails_on_size_limit() throws Exception { + createDirectories(Path.of("target/test/CreateNewDataFilesTest/tmp/temp")); + + mockTmpLookup(); + var cmd = createCmd("scripts/search/data/binary/3files.zip", mockDatasetVersion(), 1000L, 500L); + var ctxt = mockCommandContext(mockSysConfig(true, 50L, MD5, 0)); + try (var mockedStatic = Mockito.mockStatic(JhoveFileType.class)) { + mockedStatic.when(JhoveFileType::getJhoveConfigFile).thenReturn("conf/jhove/jhove.conf"); + + assertThatThrownBy(() -> cmd.execute(ctxt)) + .isInstanceOf(CommandException.class) + .hasMessage("This file size (462 B) exceeds the size limit of 50 B."); + } + } + + @Test + @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/CreateNewDataFilesTest/tmp") + public void execute_loads_individual_files_from_uploaded_zip() throws Exception { + var tempDir = testDir.resolve("tmp/temp"); + createDirectories(tempDir); + + mockTmpLookup(); + var cmd = createCmd("src/test/resources/own-cloud-downloads/greetings.zip", mockDatasetVersion(), 1000L, 500L); + var ctxt = mockCommandContext(mockSysConfig(false, 1000000L, MD5, 10)); + try (MockedStatic mockedStatic = Mockito.mockStatic(JhoveFileType.class)) { + mockedStatic.when(JhoveFileType::getJhoveConfigFile).thenReturn("conf/jhove/jhove.conf"); + + // the test + var result = cmd.execute(ctxt); + + assertThat(result.getErrors()).hasSize(0); + assertThat(result.getDataFiles().stream().map(dataFile -> + dataFile.getFileMetadata().getDirectoryLabel() + "/" + dataFile.getDisplayName() + )).containsExactlyInAnyOrder( + "DD-1576/goodbye.txt", "DD-1576/hello.txt" + ); + var storageIds = result.getDataFiles().stream().map(DataFile::getStorageIdentifier).toList(); + assertThat(tempDir.toFile().list()) + .containsExactlyInAnyOrderElementsOf(storageIds); + } + } + + @Test + @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "target/test/CreateNewDataFilesTest/tmp") + public void execute_rezips_sets_of_shape_files_from_uploaded_zip() throws Exception { + var tempDir = testDir.resolve("tmp/temp"); + createDirectories(tempDir); + + mockTmpLookup(); + var cmd = createCmd("src/test/resources/own-cloud-downloads/shapes.zip", mockDatasetVersion(), 1000L, 500L); + var ctxt = mockCommandContext(mockSysConfig(false, 100000000L, MD5, 10)); + try (var mockedJHoveFileType = Mockito.mockStatic(JhoveFileType.class)) { + mockedJHoveFileType.when(JhoveFileType::getJhoveConfigFile).thenReturn("conf/jhove/jhove.conf"); + + // the test + var result = cmd.execute(ctxt); + + assertThat(result.getErrors()).hasSize(0); + assertThat(result.getDataFiles().stream().map(dataFile -> + (dataFile.getFileMetadata().getDirectoryLabel() + "/" + dataFile.getDisplayName()) + .replaceAll(".*/dataDir/", "") + )).containsExactlyInAnyOrder( + "shape1.zip", + "shape2/shape2", + "shape2/shape2.pdf", + "shape2/shape2.txt", + "shape2/shape2.zip", + "extra/shp_dictionary.xls", + "extra/notes", + "extra/README.MD" + ); + var storageIds = result.getDataFiles().stream().map(DataFile::getStorageIdentifier).toList(); + assertThat(tempDir.toFile().list()) + .containsExactlyInAnyOrderElementsOf(storageIds); + } + } + + @Disabled("Too slow. Intended for manual execution.") + @Test + @JvmSetting(key = JvmSettings.FILES_DIRECTORY, value = "/tmp/test/CreateNewDataFilesTest/tmp") + public void extract_zip_performance() throws Exception { + /* + Developed to test performance difference between the old implementation with ZipInputStream and the new ZipFile implementation. + Play with numbers depending on: + - the time you want to spend on this test + - how much system stress you want to examine + */ + var nrOfZipFiles = 20; + var avgNrOfFilesPerZip = 300; + var avgFileLength = 5000; + + var tmpUploadStorage = Path.of("/tmp/test/CreateNewDataFilesTest/tmp/temp"); + if(tmpUploadStorage.toFile().exists()) { + deleteDirectory(tmpUploadStorage); + } + createDirectories(tmpUploadStorage); // temp in target would choke intellij + + var chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; + var random = new SecureRandom(); + var totalNrOfFiles = 0; + var totalFileSize = 0; + var totalTime = 0L; + var tmp = Path.of(Files.createTempDirectory(null).toString()); + var ctxt = mockCommandContext(mockSysConfig(false, 100000000L, MD5, 10000)); + try (var mockedJHoveFileType = Mockito.mockStatic(JhoveFileType.class)) { + mockedJHoveFileType.when(JhoveFileType::getJhoveConfigFile).thenReturn("conf/jhove/jhove.conf"); + for (var zipNr = 1; zipNr <= nrOfZipFiles; zipNr++) { + // build the zip + var zip = tmp.resolve(zipNr + "-data.zip"); + var nrOfFilesInZip = random.nextInt(avgNrOfFilesPerZip * 2); + try (var zipStream = new ZipOutputStream(new FileOutputStream(zip.toFile()))) { + for (var fileInZipNr = 1; fileInZipNr <= nrOfFilesInZip; fileInZipNr++) { + // build content for a file + var stringLength = random.nextInt(avgFileLength * 2 -5); + StringBuilder sb = new StringBuilder(stringLength); + for (int i = 1; i <= stringLength; i++) {// zero length causes buffer underflow + sb.append(chars.charAt(random.nextInt(chars.length()))); + } + // add the file to the zip + zipStream.putNextEntry(new ZipEntry(fileInZipNr + ".txt")); + zipStream.write((sb.toString()).getBytes()); + zipStream.closeEntry(); + totalFileSize += stringLength; + } + } + + // upload the zip + var before = DateTime.now(); + var result = createCmd(zip.toString(), mockDatasetVersion(), 1000L, 500L) + .execute(ctxt); + totalTime += DateTime.now().getMillis() - before.getMillis(); + + assertThat(result.getErrors()).hasSize(0); + assertThat(result.getDataFiles()).hasSize(nrOfFilesInZip); + totalNrOfFiles += nrOfFilesInZip; + + // report after each zip to have some data even when aborting a test that takes too long + System.out.println(MessageFormat.format( + "Total time: {0}ms; nr of zips {1} total nr of files {2}; total file size {3}", + totalTime, zipNr, totalNrOfFiles, totalFileSize + )); + } + assertThat(tmpUploadStorage.toFile().list()).hasSize(totalNrOfFiles); + } + } + + private static @NotNull CreateNewDataFilesCommand createCmd(String name, DatasetVersion dsVersion, long allocatedQuotaLimit, long usedQuotaLimit) throws FileNotFoundException { + return new CreateNewDataFilesCommand( + Mockito.mock(DataverseRequest.class), + dsVersion, + new FileInputStream(name), + "example.zip", + "application/zip", + null, + new UploadSessionQuotaLimit(allocatedQuotaLimit, usedQuotaLimit), + "sha"); + } + + private static @NotNull CommandContext mockCommandContext(SystemConfig sysCfg) { + var ctxt = Mockito.mock(CommandContext.class); + Mockito.when(ctxt.systemConfig()).thenReturn(sysCfg); + return ctxt; + } + + private static @NotNull SystemConfig mockSysConfig(boolean isStorageQuataEnforced, long maxFileUploadSizeForStore, DataFile.ChecksumType checksumType, int zipUploadFilesLimit) { + var sysCfg = Mockito.mock(SystemConfig.class); + Mockito.when(sysCfg.isStorageQuotasEnforced()).thenReturn(isStorageQuataEnforced); + Mockito.when(sysCfg.getMaxFileUploadSizeForStore(any())).thenReturn(maxFileUploadSizeForStore); + Mockito.when(sysCfg.getFileFixityChecksumAlgorithm()).thenReturn(checksumType); + Mockito.when(sysCfg.getZipUploadFilesLimit()).thenReturn(zipUploadFilesLimit); + return sysCfg; + } + + private static void mockTmpLookup() { + JvmSettings mockFilesDirectory = Mockito.mock(JvmSettings.class); + Mockito.when(mockFilesDirectory.lookup()).thenReturn("/mocked/path"); + } + + private static @NotNull DatasetVersion mockDatasetVersion() { + var dsVersion = Mockito.mock(DatasetVersion.class); + Mockito.when(dsVersion.getDataset()).thenReturn(Mockito.mock(Dataset.class)); + return dsVersion; + } + +} diff --git a/src/test/java/edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java index 58d69da743b..bacb231b4d5 100644 --- a/src/test/java/edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/pidproviders/PidUtilTest.java @@ -99,7 +99,7 @@ @JvmSetting(key = JvmSettings.PID_PROVIDER_LABEL, value = "FAKE 1", varArgs = "fake1") @JvmSetting(key = JvmSettings.PID_PROVIDER_TYPE, value = FakeDOIProvider.TYPE, varArgs = "fake1") @JvmSetting(key = JvmSettings.PID_PROVIDER_AUTHORITY, value = "10.5074", varArgs = "fake1") -@JvmSetting(key = JvmSettings.PID_PROVIDER_SHOULDER, value = "FK", varArgs = "fake1") +@JvmSetting(key = JvmSettings.PID_PROVIDER_SHOULDER, value = "fk", varArgs = "fake1") @JvmSetting(key = JvmSettings.PID_PROVIDER_MANAGED_LIST, value = "doi:10.5073/FK3ABCDEF", varArgs ="fake1") //HANDLE 1 @@ -250,9 +250,12 @@ public void testDOIParsing() throws IOException { assertEquals(pid1String, pid3.asString()); assertEquals("dc1", pid3.getProviderId()); - String pid4String = "doi:10.5072/FK3ABCDEF"; + //Also test case insensitive + String pid4String = "doi:10.5072/fk3ABCDEF"; GlobalId pid4 = PidUtil.parseAsGlobalID(pid4String); - assertEquals(pid4String, pid4.asString()); + // Lower case is recognized by converting to upper case internally, so we need to test vs. the upper case identifier + // I.e. we are verifying that the lower case string is parsed the same as the upper case string, both give an internal upper case PID representation + assertEquals("doi:10.5072/FK3ABCDEF", pid4.asString()); assertEquals("dc2", pid4.getProviderId()); String pid5String = "doi:10.5072/FK2ABCDEF"; @@ -312,6 +315,13 @@ public void testUnmanagedParsing() throws IOException { GlobalId pid6 = PidUtil.parseAsGlobalID(pid6String); assertEquals(pid6String, pid6.asString()); assertEquals(UnmanagedPermaLinkPidProvider.ID, pid6.getProviderId()); + + //Lowercase test for unmanaged DOIs + String pid7String = "doi:10.5281/zenodo.6381129"; + GlobalId pid7 = PidUtil.parseAsGlobalID(pid7String); + assertEquals(UnmanagedDOIProvider.ID, pid5.getProviderId()); + assertEquals(pid7String.toUpperCase().replace("DOI", "doi"), pid7.asString()); + } @@ -350,15 +360,15 @@ public void testExcludedSetParsing() throws IOException { @Test public void testManagedSetParsing() throws IOException { - String pid1String = "doi:10.5073/FK3ABCDEF"; + String pid1String = "doi:10.5073/fk3ABCDEF"; GlobalId pid2 = PidUtil.parseAsGlobalID(pid1String); - assertEquals(pid1String, pid2.asString()); + assertEquals(pid1String.toUpperCase().replace("DOI", "doi"), pid2.asString()); assertEquals("fake1", pid2.getProviderId()); assertEquals("https://doi.org/" + pid2.getAuthority() + PidUtil.getPidProvider(pid2.getProviderId()).getSeparator() + pid2.getIdentifier(),pid2.asURL()); assertEquals("10.5073", pid2.getAuthority()); assertEquals(AbstractDOIProvider.DOI_PROTOCOL, pid2.getProtocol()); GlobalId pid3 = PidUtil.parseAsGlobalID(pid2.asURL()); - assertEquals(pid1String, pid3.asString()); + assertEquals(pid1String.toUpperCase().replace("DOI", "doi"), pid3.asString()); assertEquals("fake1", pid3.getProviderId()); assertFalse(PidUtil.getPidProvider(pid3.getProviderId()).canCreatePidsLike(pid3)); diff --git a/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java b/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java index c03146904de..2bd6818821d 100644 --- a/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java @@ -1,15 +1,21 @@ package edu.harvard.iq.dataverse.pidproviders.doi.datacite; +import edu.harvard.iq.dataverse.ControlledVocabularyValue; +import edu.harvard.iq.dataverse.DataCitation; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetAuthor; import edu.harvard.iq.dataverse.DatasetField; +import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; import edu.harvard.iq.dataverse.DatasetFieldConstant; import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.DatasetFieldValue; import edu.harvard.iq.dataverse.DatasetFieldType.FieldType; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DatasetVersion.VersionState; +import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DataverseServiceBean; import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.MetadataBlock; import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.dataset.DatasetType; @@ -20,16 +26,30 @@ import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.json.CompoundVocabularyException; +import edu.harvard.iq.dataverse.util.json.ControlledVocabularyException; +import edu.harvard.iq.dataverse.util.json.JsonParseException; +import edu.harvard.iq.dataverse.util.json.JsonParser; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.testing.JvmSetting; import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; import edu.harvard.iq.dataverse.util.xml.XmlValidator; +import jakarta.json.JsonArray; +import jakarta.json.JsonObject; +import jakarta.json.JsonString; +import java.io.File; import java.io.IOException; import java.io.StringReader; import java.net.URL; +import java.nio.file.Files; +import java.nio.file.Paths; import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Set; import javax.xml.transform.stream.StreamSource; @@ -73,6 +93,8 @@ public static void setupMocks() { } /** + * A minimal example to assure that the XMLMetadataTemplate generates output + * consistent with the DataCite XML v4.5 schema. */ @Test public void testDataCiteXMLCreation() throws IOException { @@ -106,7 +128,7 @@ public void testDataCiteXMLCreation() throws IOException { doiMetadata.setAuthors(authors); doiMetadata.setPublisher("Dataverse"); XmlMetadataTemplate template = new XmlMetadataTemplate(doiMetadata); - + Dataset d = new Dataset(); GlobalId doi = new GlobalId("doi", "10.5072", "FK2/ABCDEF", null, null, null); d.setGlobalId(doi); @@ -135,15 +157,291 @@ public void testDataCiteXMLCreation() throws IOException { d.setDatasetType(dType); String xml = template.generateXML(d); - System.out.println("Output is " + xml); + System.out.println("Output from minimal example is " + xml); try { StreamSource source = new StreamSource(new StringReader(xml)); source.setSystemId("DataCite XML for test dataset"); - assertTrue(XmlValidator.validateXmlSchema(source, new URL("https://schema.datacite.org/meta/kernel-4/metadata.xsd"))); + assertTrue(XmlValidator.validateXmlSchema(source, + new URL("https://schema.datacite.org/meta/kernel-4/metadata.xsd"))); } catch (SAXException e) { System.out.println("Invalid schema: " + e.getMessage()); } - + + } + + /** + * This tests a more complete example based off of the dataset-all-defaults + * file, again checking for conformance of the result with the DataCite XML v4.5 + * schema. + */ + @Test + public void testDataCiteXMLCreationAllFields() throws IOException { + Dataverse collection = new Dataverse(); + collection.setCitationDatasetFieldTypes(new ArrayList<>()); + Dataset d = new Dataset(); + d.setOwner(collection); + DatasetVersion dv = new DatasetVersion(); + TermsOfUseAndAccess toa = new TermsOfUseAndAccess(); + toa.setTermsOfUse("Some terms"); + dv.setTermsOfUseAndAccess(toa); + dv.setDataset(d); + DatasetFieldType primitiveDSFType = new DatasetFieldType(DatasetFieldConstant.title, + DatasetFieldType.FieldType.TEXT, false); + DatasetField testDatasetField = new DatasetField(); + + dv.setVersionState(VersionState.DRAFT); + + testDatasetField.setDatasetVersion(dv); + + File datasetVersionJson = new File("src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt"); + String datasetVersionAsJson = new String(Files.readAllBytes(Paths.get(datasetVersionJson.getAbsolutePath()))); + JsonObject datasetJson = JsonUtil.getJsonObject(datasetVersionAsJson); + + GlobalId doi = new GlobalId("doi", datasetJson.getString("authority"), datasetJson.getString("identifier"), + null, null, null); + d.setGlobalId(doi); + + List fields = assertDoesNotThrow(() -> XmlMetadataTemplateTest + .parseMetadataBlocks(datasetJson.getJsonObject("datasetVersion").getJsonObject("metadataBlocks"))); + dv.setDatasetFields(fields); + + ArrayList dsvs = new ArrayList<>(); + dsvs.add(0, dv); + d.setVersions(dsvs); + DatasetType dType = new DatasetType(); + dType.setName(DatasetType.DATASET_TYPE_DATASET); + d.setDatasetType(dType); + String xml = DOIDataCiteRegisterService.getMetadataFromDvObject(dv.getDataset().getGlobalId().asString(), + new DataCitation(dv).getDataCiteMetadata(), dv.getDataset()); + System.out.println("Output from dataset-all-defaults is " + xml); + try { + StreamSource source = new StreamSource(new StringReader(xml)); + source.setSystemId("DataCite XML for test dataset"); + assertTrue(XmlValidator.validateXmlSchema(source, + new URL("https://schema.datacite.org/meta/kernel-4/metadata.xsd"))); + } catch (SAXException e) { + System.out.println("Invalid schema: " + e.getMessage()); + } + + } + + /** + * Mock Utility Methods - These methods support importing DatasetFields from the + * Dataverse JSON export format. They assume that any DatasetFieldType + * referenced exists, that any Controlled Vocabulary value exists, etc. which + * avoids having to do database lookups or read metadatablock tsv files. They + * are derived from the JsonParser methods of the same names with any db + * references and DatasetFieldType-related error checking removed. + */ + public static List parseMetadataBlocks(JsonObject json) throws JsonParseException { + + Map existingTypes = new HashMap<>(); + + Set keys = json.keySet(); + List fields = new LinkedList<>(); + + for (String blockName : keys) { + MetadataBlock block = new MetadataBlock(); + block.setName(blockName); + JsonObject blockJson = json.getJsonObject(blockName); + JsonArray fieldsJson = blockJson.getJsonArray("fields"); + fields.addAll(parseFieldsFromArray(fieldsJson, true, block, existingTypes)); + } + return fields; + } + + private static List parseFieldsFromArray(JsonArray fieldsArray, Boolean testType, MetadataBlock block, + Map existingTypes) throws JsonParseException { + List fields = new LinkedList<>(); + for (JsonObject fieldJson : fieldsArray.getValuesAs(JsonObject.class)) { + + DatasetField field = parseField(fieldJson, testType, block, existingTypes); + if (field != null) { + fields.add(field); + } + + } + return fields; + + } + + public static DatasetField parseField(JsonObject json, Boolean testType, MetadataBlock block, + Map existingTypes) throws JsonParseException { + if (json == null) { + return null; + } + + DatasetField ret = new DatasetField(); + String fieldName = json.getString("typeName", ""); + String typeClass = json.getString("typeClass", ""); + if (!existingTypes.containsKey(fieldName)) { + boolean multiple = json.getBoolean("multiple"); + DatasetFieldType fieldType = new DatasetFieldType(); + fieldType.setName(fieldName); + fieldType.setAllowMultiples(multiple); + fieldType.setAllowControlledVocabulary(typeClass.equals("controlledVocabulary")); + fieldType.setFieldType(FieldType.TEXT); + fieldType.setMetadataBlock(block); + fieldType.setChildDatasetFieldTypes(new ArrayList<>()); + existingTypes.put(fieldName, fieldType); + } + DatasetFieldType type = existingTypes.get(fieldName); + ret.setDatasetFieldType(type); + + if (typeClass.equals("compound")) { + parseCompoundValue(ret, type, json, testType, block, existingTypes); + } else if (type.isControlledVocabulary()) { + parseControlledVocabularyValue(ret, type, json); + } else { + parsePrimitiveValue(ret, type, json); + } + + return ret; + } + + public static void parseCompoundValue(DatasetField dsf, DatasetFieldType compoundType, JsonObject json, + Boolean testType, MetadataBlock block, Map existingTypes) + throws JsonParseException { + List vocabExceptions = new ArrayList<>(); + List vals = new LinkedList<>(); + if (compoundType.isAllowMultiples()) { + int order = 0; + try { + json.getJsonArray("value").getValuesAs(JsonObject.class); + } catch (ClassCastException cce) { + throw new JsonParseException("Invalid values submitted for " + compoundType.getName() + + ". It should be an array of values."); + } + for (JsonObject obj : json.getJsonArray("value").getValuesAs(JsonObject.class)) { + DatasetFieldCompoundValue cv = new DatasetFieldCompoundValue(); + List fields = new LinkedList<>(); + for (String fieldName : obj.keySet()) { + JsonObject childFieldJson = obj.getJsonObject(fieldName); + DatasetField f = null; + try { + f = parseField(childFieldJson, testType, block, existingTypes); + } catch (ControlledVocabularyException ex) { + vocabExceptions.add(ex); + } + + if (f != null) { + f.setParentDatasetFieldCompoundValue(cv); + fields.add(f); + } + } + if (!fields.isEmpty()) { + cv.setChildDatasetFields(fields); + cv.setDisplayOrder(order); + vals.add(cv); + } + order++; + } + + } else { + + DatasetFieldCompoundValue cv = new DatasetFieldCompoundValue(); + List fields = new LinkedList<>(); + JsonObject value = json.getJsonObject("value"); + for (String key : value.keySet()) { + JsonObject childFieldJson = value.getJsonObject(key); + DatasetField f = null; + try { + f = parseField(childFieldJson, testType, block, existingTypes); + } catch (ControlledVocabularyException ex) { + vocabExceptions.add(ex); + } + if (f != null) { + f.setParentDatasetFieldCompoundValue(cv); + fields.add(f); + } + } + if (!fields.isEmpty()) { + cv.setChildDatasetFields(fields); + vals.add(cv); + } + + } + if (!vocabExceptions.isEmpty()) { + throw new CompoundVocabularyException("Invalid controlled vocabulary in compound field ", vocabExceptions, + vals); + } + + for (DatasetFieldCompoundValue dsfcv : vals) { + dsfcv.setParentDatasetField(dsf); + } + dsf.setDatasetFieldCompoundValues(vals); + } + + public static void parsePrimitiveValue(DatasetField dsf, DatasetFieldType dft, JsonObject json) + throws JsonParseException { + List vals = new LinkedList<>(); + if (dft.isAllowMultiples()) { + try { + json.getJsonArray("value").getValuesAs(JsonObject.class); + } catch (ClassCastException cce) { + throw new JsonParseException( + "Invalid values submitted for " + dft.getName() + ". It should be an array of values."); + } + for (JsonString val : json.getJsonArray("value").getValuesAs(JsonString.class)) { + DatasetFieldValue datasetFieldValue = new DatasetFieldValue(dsf); + datasetFieldValue.setDisplayOrder(vals.size() - 1); + datasetFieldValue.setValue(val.getString().trim()); + vals.add(datasetFieldValue); + } + + } else { + try { + json.getString("value"); + } catch (ClassCastException cce) { + throw new JsonParseException( + "Invalid value submitted for " + dft.getName() + ". It should be a single value."); + } + DatasetFieldValue datasetFieldValue = new DatasetFieldValue(); + datasetFieldValue.setValue(json.getString("value", "").trim()); + datasetFieldValue.setDatasetField(dsf); + vals.add(datasetFieldValue); + } + + dsf.setDatasetFieldValues(vals); + } + + public static void parseControlledVocabularyValue(DatasetField dsf, DatasetFieldType cvvType, JsonObject json) + throws JsonParseException { + List vals = new LinkedList<>(); + try { + if (cvvType.isAllowMultiples()) { + try { + json.getJsonArray("value").getValuesAs(JsonObject.class); + } catch (ClassCastException cce) { + throw new JsonParseException( + "Invalid values submitted for " + cvvType.getName() + ". It should be an array of values."); + } + for (JsonString strVal : json.getJsonArray("value").getValuesAs(JsonString.class)) { + String strValue = strVal.getString(); + ControlledVocabularyValue cvv = new ControlledVocabularyValue(); + cvv.setDatasetFieldType(cvvType); + cvv.setStrValue(strVal.getString()); + vals.add(cvv); + } + + } else { + try { + json.getString("value"); + } catch (ClassCastException cce) { + throw new JsonParseException( + "Invalid value submitted for " + cvvType.getName() + ". It should be a single value."); + } + String strValue = json.getString("value", ""); + ControlledVocabularyValue cvv = new ControlledVocabularyValue(); + cvv.setDatasetFieldType(cvvType); + cvv.setStrValue(strValue); + vals.add(cvv); + } + } catch (ClassCastException cce) { + throw new JsonParseException("Invalid values submitted for " + cvvType.getName()); + } + + dsf.setControlledVocabularyValues(vals); } } diff --git a/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java index c3d9fd8fcd3..d772ba2b9da 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/PersonOrOrgUtilTest.java @@ -89,6 +89,8 @@ public void testName() { verifyIsPerson("kcjim11, kcjim11", "kcjim11", "kcjim11"); verifyIsPerson("Bartholomew 3, James", "James", "Bartholomew 3"); + verifyIsPerson("Smith, ", null, "Smith"); + verifyIsPerson("Smith,", null, "Smith"); } private void verifyIsOrganization(String fullName) { @@ -106,7 +108,7 @@ private void verifyIsPerson(String fullName, String givenName, String familyName private void verifyIsPerson(String fullName, String givenName, String familyName, boolean isPerson) { JsonObject obj = PersonOrOrgUtil.getPersonOrOrganization(fullName, false, isPerson); System.out.println(JsonUtil.prettyPrint(obj)); - assertEquals(obj.getString("fullName"),fullName); + assertEquals(obj.getString("fullName"), StringUtil.normalize(fullName)); assertTrue(obj.getBoolean("isPerson")); assertEquals(obj.containsKey("givenName"), givenName != null); if(obj.containsKey("givenName") && givenName != null) { diff --git a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonParserTest.java b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonParserTest.java index 59e175f30c1..d1cb30e2bc3 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/json/JsonParserTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/json/JsonParserTest.java @@ -4,18 +4,11 @@ package edu.harvard.iq.dataverse.util.json; -import edu.harvard.iq.dataverse.ControlledVocabularyValue; -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetField; -import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; -import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.*; import edu.harvard.iq.dataverse.DatasetFieldType.FieldType; -import edu.harvard.iq.dataverse.DatasetFieldValue; -import edu.harvard.iq.dataverse.DatasetVersion; -import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DataverseTheme.Alignment; -import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.UserNotification.Type; +import edu.harvard.iq.dataverse.api.dto.DataverseDTO; import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.IpGroup; import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.IpGroupProvider; import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.ip.IpAddress; @@ -50,16 +43,7 @@ import java.io.StringReader; import java.math.BigDecimal; import java.text.ParseException; -import java.util.Arrays; -import java.util.Calendar; -import java.util.Collections; -import java.util.Date; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; -import java.util.Set; -import java.util.TimeZone; +import java.util.*; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.*; @@ -281,6 +265,33 @@ public void testParseCompleteDataverse() throws JsonParseException { throw new JsonParseException("Couldn't read test file", ioe); } } + + /** + * Test that a JSON object passed for a DataverseDTO is correctly parsed. + * This checks that all properties are parsed into the correct DataverseDTO properties. + * @throws JsonParseException when this test is broken. + */ + @Test + public void parseDataverseDTO() throws JsonParseException { + JsonObject dvJson; + try (FileReader reader = new FileReader("doc/sphinx-guides/source/_static/api/dataverse-complete.json")) { + dvJson = Json.createReader(reader).readObject(); + DataverseDTO actual = sut.parseDataverseDTO(dvJson); + List actualDataverseContacts = actual.getDataverseContacts(); + assertEquals("Scientific Research", actual.getName()); + assertEquals("science", actual.getAlias()); + assertEquals("Scientific Research University", actual.getAffiliation()); + assertEquals("We do all the science.", actual.getDescription()); + assertEquals("LABORATORY", actual.getDataverseType().toString()); + assertEquals(2, actualDataverseContacts.size()); + assertEquals("pi@example.edu", actualDataverseContacts.get(0).getContactEmail()); + assertEquals("student@example.edu", actualDataverseContacts.get(1).getContactEmail()); + assertEquals(0, actualDataverseContacts.get(0).getDisplayOrder()); + assertEquals(1, actualDataverseContacts.get(1).getDisplayOrder()); + } catch (IOException ioe) { + throw new JsonParseException("Couldn't read test file", ioe); + } + } @Test public void testParseThemeDataverse() throws JsonParseException { diff --git a/src/test/java/edu/harvard/iq/dataverse/util/shapefile/ShapefileHandlerTest.java b/src/test/java/edu/harvard/iq/dataverse/util/shapefile/ShapefileHandlerTest.java index 3c5b4797b0a..c4ee4547ed7 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/shapefile/ShapefileHandlerTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/shapefile/ShapefileHandlerTest.java @@ -63,22 +63,22 @@ private File createBlankFile(String filename) throws IOException { } return Files.createFile(tempFolder.resolve(filename)).toFile(); } - + private FileInputStream createZipReturnFilestream(List file_names, String zipfile_name) throws IOException{ - + File zip_file_obj = this.createAndZipFiles(file_names, zipfile_name); if (zip_file_obj == null){ return null; } - + FileInputStream file_input_stream = new FileInputStream(zip_file_obj); return file_input_stream; - + } - + /* - Convenience class to create .zip file and return a FileInputStream + Convenience method to create .zip file and return a File @param List file_names - List of filenames to add to .zip. These names will be used to create 0 length files @param String zipfile_name - Name of .zip file to create @@ -98,13 +98,13 @@ private File createAndZipFiles(List file_names, String zipfile_name) thr } Path zip_file_obj = this.tempFolder.resolve(zipfile_name); - ZipOutputStream zip_stream = new ZipOutputStream(new FileOutputStream(zip_file_obj.toFile())); + try (ZipOutputStream zip_stream = new ZipOutputStream(new FileOutputStream(zip_file_obj.toFile()))) { - // Iterate through File objects and add them to the ZipOutputStream - for (File file_obj : fileCollection) { - this.addToZipFile(file_obj.getName(), file_obj, zip_stream); + // Iterate through File objects and add them to the ZipOutputStream + for (File file_obj : fileCollection) { + this.addToZipFile(file_obj.getName(), file_obj, zip_stream); + } } - /* ----------------------------------- Cleanup: Delete single files that were added to .zip ----------------------------------- */ @@ -126,7 +126,7 @@ public void testCreateZippedNonShapefile() throws IOException{ File zipfile_obj = createAndZipFiles(file_names, "not-quite-a-shape.zip"); // Pass the .zip to the ShapefileHandler - ShapefileHandler shp_handler = new ShapefileHandler(new FileInputStream(zipfile_obj)); + ShapefileHandler shp_handler = new ShapefileHandler(zipfile_obj); shp_handler.DEBUG= true; // Contains shapefile? @@ -157,7 +157,7 @@ public void testShapefileWithQpjAndQmd() throws IOException { File zipFile = createAndZipFiles(fileNames, "testShapeWithNewExtensions.zip"); // Pass the zip to the ShapefileHandler - ShapefileHandler shpHandler = new ShapefileHandler(new FileInputStream(zipFile)); + ShapefileHandler shpHandler = new ShapefileHandler(zipFile); shpHandler.DEBUG = true; // Check if it is recognized as a shapefile @@ -191,7 +191,7 @@ public void testZippedTwoShapefiles() throws IOException{ File zipfile_obj = createAndZipFiles(file_names, "two-shapes.zip"); // Pass the .zip to the ShapefileHandler - ShapefileHandler shp_handler = new ShapefileHandler(new FileInputStream(zipfile_obj)); + ShapefileHandler shp_handler = new ShapefileHandler(zipfile_obj); shp_handler.DEBUG= true; assertTrue(shp_handler.containsShapefile(), "verify shapefile existance"); @@ -217,7 +217,7 @@ public void testZippedTwoShapefiles() throws IOException{ // Rezip/Reorder the files File test_unzip_folder = Files.createDirectory(this.tempFolder.resolve("test_unzip")).toFile(); //File test_unzip_folder = new File("/Users/rmp553/Desktop/blah"); - shp_handler.rezipShapefileSets(new FileInputStream(zipfile_obj), test_unzip_folder ); + shp_handler.rezipShapefileSets(test_unzip_folder ); // Does the re-ordering do what we wanted? @@ -244,7 +244,7 @@ public void testZippedShapefileWithExtraFiles() throws IOException{ File zipfile_obj = createAndZipFiles(file_names, "shape-plus.zip"); // Pass the .zip to the ShapefileHandler - ShapefileHandler shp_handler = new ShapefileHandler(new FileInputStream(zipfile_obj)); + ShapefileHandler shp_handler = new ShapefileHandler(zipfile_obj); shp_handler.DEBUG= true; assertTrue(shp_handler.containsShapefile(), "verify shapefile existance"); @@ -264,7 +264,7 @@ public void testZippedShapefileWithExtraFiles() throws IOException{ File unzip2Folder = Files.createDirectory(this.tempFolder.resolve("test_unzip2")).toFile(); // Rezip/Reorder the files - shp_handler.rezipShapefileSets(new FileInputStream(zipfile_obj), unzip2Folder); + shp_handler.rezipShapefileSets(unzip2Folder); //shp_handler.rezipShapefileSets(new FileInputStream(zipfile_obj), new File("/Users/rmp553/Desktop/blah")); @@ -284,9 +284,9 @@ public void testZippedShapefileWithExtraFiles() throws IOException{ } @Test - public void testHiddenFiles() { + public void testHiddenFiles() throws IOException { // test with shapefiles in hidden directory - ShapefileHandler shp_handler = new ShapefileHandler("src/test/resources/hiddenShapefiles.zip"); + ShapefileHandler shp_handler = new ShapefileHandler(new File("src/test/resources/hiddenShapefiles.zip")); shp_handler.DEBUG= true; assertFalse(shp_handler.containsShapefile()); } diff --git a/src/test/resources/own-cloud-downloads/greetings.zip b/src/test/resources/own-cloud-downloads/greetings.zip new file mode 100644 index 00000000000..6e166d385d1 Binary files /dev/null and b/src/test/resources/own-cloud-downloads/greetings.zip differ diff --git a/src/test/resources/own-cloud-downloads/shapes.zip b/src/test/resources/own-cloud-downloads/shapes.zip new file mode 100644 index 00000000000..99d5f36c895 Binary files /dev/null and b/src/test/resources/own-cloud-downloads/shapes.zip differ