From 4878cfe47a284f029b2d98adb64d02dafdb540b6 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 3 May 2024 10:31:06 -0400 Subject: [PATCH 001/105] separate metadata parsing/params from XML generation code --- .../pidproviders/doi/AbstractDOIProvider.java | 25 ++-- .../pidproviders/doi/DoiMetadata.java | 138 ++++++++++++++++++ .../datacite/DOIDataCiteRegisterService.java | 57 ++++---- 3 files changed, 180 insertions(+), 40 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/DoiMetadata.java diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java index 43e34e74c59..02a7dedce47 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/AbstractDOIProvider.java @@ -91,31 +91,30 @@ public String getMetadataFromDvObject(String identifier, Map met } else { dataset = (Dataset) dvObject.getOwner(); } - - XmlMetadataTemplate metadataTemplate = new XmlMetadataTemplate(); - metadataTemplate.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); - metadataTemplate.setCreators(Arrays.asList(metadata.get("datacite.creator").split("; "))); - metadataTemplate.setAuthors(dataset.getLatestVersion().getDatasetAuthors()); + DoiMetadata doiMetadata = new DoiMetadata(); + doiMetadata.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); + doiMetadata.setCreators(Arrays.asList(metadata.get("datacite.creator").split("; "))); + doiMetadata.setAuthors(dataset.getLatestVersion().getDatasetAuthors()); if (dvObject.isInstanceofDataset()) { - metadataTemplate.setDescription(dataset.getLatestVersion().getDescriptionPlainText()); + doiMetadata.setDescription(dataset.getLatestVersion().getDescriptionPlainText()); } if (dvObject.isInstanceofDataFile()) { DataFile df = (DataFile) dvObject; String fileDescription = df.getDescription(); - metadataTemplate.setDescription(fileDescription == null ? "" : fileDescription); + doiMetadata.setDescription(fileDescription == null ? "" : fileDescription); } - metadataTemplate.setContacts(dataset.getLatestVersion().getDatasetContacts()); - metadataTemplate.setProducers(dataset.getLatestVersion().getDatasetProducers()); - metadataTemplate.setTitle(dvObject.getCurrentName()); + doiMetadata.setContacts(dataset.getLatestVersion().getDatasetContacts()); + doiMetadata.setProducers(dataset.getLatestVersion().getDatasetProducers()); + doiMetadata.setTitle(dvObject.getCurrentName()); String producerString = pidProviderService.getProducer(); if (producerString.isEmpty() || producerString.equals(DatasetField.NA_VALUE)) { producerString = UNAVAILABLE; } - metadataTemplate.setPublisher(producerString); - metadataTemplate.setPublisherYear(metadata.get("datacite.publicationyear")); + doiMetadata.setPublisher(producerString); + doiMetadata.setPublisherYear(metadata.get("datacite.publicationyear")); - String xmlMetadata = metadataTemplate.generateXML(dvObject); + String xmlMetadata = new XmlMetadataTemplate(doiMetadata).generateXML(dvObject); logger.log(Level.FINE, "XML to send to DataCite: {0}", xmlMetadata); return xmlMetadata; } diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/DoiMetadata.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/DoiMetadata.java new file mode 100644 index 00000000000..ffd24747bc2 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/DoiMetadata.java @@ -0,0 +1,138 @@ +package edu.harvard.iq.dataverse.pidproviders.doi; + +import java.util.ArrayList; +import java.util.List; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import edu.harvard.iq.dataverse.DatasetAuthor; + + +//Parses some specific parts of a DataCite XML metadata file +public class DoiMetadata { + + private String identifier; + private List creators; + private String title; + private String publisher; + private String publisherYear; + private List datafileIdentifiers; + private List authors; + private String description; + private List contacts; + private List producers; + + + public DoiMetadata() { + } + + public void parseDataCiteXML(String xmlMetaData) { + Document doc = Jsoup.parseBodyFragment(xmlMetaData); + Elements identifierElements = doc.select("identifier"); + if (identifierElements.size() > 0) { + identifier = identifierElements.get(0).html(); + } + Elements creatorElements = doc.select("creatorName"); + creators = new ArrayList<>(); + for (Element creatorElement : creatorElements) { + creators.add(creatorElement.html()); + } + Elements titleElements = doc.select("title"); + if (titleElements.size() > 0) { + title = titleElements.get(0).html(); + } + Elements publisherElements = doc.select("publisher"); + if (publisherElements.size() > 0) { + publisher = publisherElements.get(0).html(); + } + Elements publisherYearElements = doc.select("publicationYear"); + if (publisherYearElements.size() > 0) { + publisherYear = publisherYearElements.get(0).html(); + } + } + + public String getIdentifier() { + return identifier; + } + + public void setIdentifier(String identifier) { + this.identifier = identifier; + } + + public List getCreators() { + return creators; + } + + public void setCreators(List creators) { + this.creators = creators; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getPublisher() { + return publisher; + } + + public void setPublisher(String publisher) { + this.publisher = publisher; + } + + public String getPublisherYear() { + return publisherYear; + } + + public void setPublisherYear(String publisherYear) { + this.publisherYear = publisherYear; + } + + + public List getProducers() { + return producers; + } + + public void setProducers(List producers) { + this.producers = producers; + } + + public List getContacts() { + return contacts; + } + + public void setContacts(List contacts) { + this.contacts = contacts; + } + + public String getDescription() { + return description; + } + + public void setDescription(String description) { + this.description = description; + } + + public List getAuthors() { + return authors; + } + + public void setAuthors(List authors) { + this.authors = authors; + } + + + public List getDatafileIdentifiers() { + return datafileIdentifiers; + } + + public void setDatafileIdentifiers(List datafileIdentifiers) { + this.datafileIdentifiers = datafileIdentifiers; + } + +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java index 0e322eace05..bc69275ac1d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/DOIDataCiteRegisterService.java @@ -21,6 +21,7 @@ import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.branding.BrandingUtil; import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.DoiMetadata; import edu.harvard.iq.dataverse.pidproviders.doi.XmlMetadataTemplate; /** @@ -90,28 +91,28 @@ public static String getMetadataFromDvObject(String identifier, Map from HTML, it leaves '&' (at least so we need to xml escape as well String description = StringEscapeUtils.escapeXml10(dataset.getLatestVersion().getDescriptionPlainText()); if (description.isEmpty() || description.equals(DatasetField.NA_VALUE)) { description = AbstractPidProvider.UNAVAILABLE; } - metadataTemplate.setDescription(description); + doiMetadata.setDescription(description); } if (dvObject.isInstanceofDataFile()) { DataFile df = (DataFile) dvObject; //Note: File metadata is not escaped like dataset metadata is, so adding an xml escape here. //This could/should be removed if the datafile methods add escaping String fileDescription = StringEscapeUtils.escapeXml10(df.getDescription()); - metadataTemplate.setDescription(fileDescription == null ? AbstractPidProvider.UNAVAILABLE : fileDescription); + doiMetadata.setDescription(fileDescription == null ? AbstractPidProvider.UNAVAILABLE : fileDescription); } - metadataTemplate.setContacts(dataset.getLatestVersion().getDatasetContacts()); - metadataTemplate.setProducers(dataset.getLatestVersion().getDatasetProducers()); + doiMetadata.setContacts(dataset.getLatestVersion().getDatasetContacts()); + doiMetadata.setProducers(dataset.getLatestVersion().getDatasetProducers()); String title = dvObject.getCurrentName(); if(dvObject.isInstanceofDataFile()) { //Note file title is not currently escaped the way the dataset title is, so adding it here. @@ -122,40 +123,41 @@ public static String getMetadataFromDvObject(String identifier, Map metadata, DvObject dvObject) { - XmlMetadataTemplate metadataTemplate = new XmlMetadataTemplate(); - metadataTemplate.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); - metadataTemplate.setCreators(Arrays.asList(metadata.get("datacite.creator").split("; "))); + DoiMetadata doiMetadata = new DoiMetadata(); + + doiMetadata.setIdentifier(identifier.substring(identifier.indexOf(':') + 1)); + doiMetadata.setCreators(Arrays.asList(metadata.get("datacite.creator").split("; "))); - metadataTemplate.setDescription(AbstractPidProvider.UNAVAILABLE); + doiMetadata.setDescription(AbstractPidProvider.UNAVAILABLE); String title =metadata.get("datacite.title"); System.out.print("Map metadata title: "+ metadata.get("datacite.title")); - metadataTemplate.setAuthors(null); + doiMetadata.setAuthors(null); - metadataTemplate.setTitle(title); + doiMetadata.setTitle(title); String producerString = AbstractPidProvider.UNAVAILABLE; - metadataTemplate.setPublisher(producerString); - metadataTemplate.setPublisherYear(metadata.get("datacite.publicationyear")); + doiMetadata.setPublisher(producerString); + doiMetadata.setPublisherYear(metadata.get("datacite.publicationyear")); - String xmlMetadata = metadataTemplate.generateXML(dvObject); + String xmlMetadata = new XmlMetadataTemplate(doiMetadata).generateXML(dvObject); logger.log(Level.FINE, "XML to send to DataCite: {0}", xmlMetadata); return xmlMetadata; } @@ -209,11 +211,12 @@ Map getMetadata(String identifier) throws IOException { Map metadata = new HashMap<>(); try { String xmlMetadata = client.getMetadata(identifier.substring(identifier.indexOf(":") + 1)); - XmlMetadataTemplate template = new XmlMetadataTemplate(xmlMetadata); - metadata.put("datacite.creator", String.join("; ", template.getCreators())); - metadata.put("datacite.title", template.getTitle()); - metadata.put("datacite.publisher", template.getPublisher()); - metadata.put("datacite.publicationyear", template.getPublisherYear()); + DoiMetadata doiMetadata = new DoiMetadata(); + doiMetadata.parseDataCiteXML(xmlMetadata); + metadata.put("datacite.creator", String.join("; ", doiMetadata.getCreators())); + metadata.put("datacite.title", doiMetadata.getTitle()); + metadata.put("datacite.publisher", doiMetadata.getPublisher()); + metadata.put("datacite.publicationyear", doiMetadata.getPublisherYear()); } catch (RuntimeException e) { logger.log(Level.INFO, identifier, e); } From 68792c2f92c90f716f39caaa5f76b652592186c0 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 3 May 2024 10:31:37 -0400 Subject: [PATCH 002/105] extract some common xml writing util code --- .../dataverse/export/ddi/DdiExportUtil.java | 486 ++++++------------ .../iq/dataverse/util/xml/XmlWriterUtil.java | 174 +++++++ 2 files changed, 340 insertions(+), 320 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index 9a689f7a4ed..0c861cb6c09 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -24,6 +24,8 @@ import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.xml.XmlPrinter; +import edu.harvard.iq.dataverse.util.xml.XmlWriterUtil; + import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; @@ -111,9 +113,9 @@ private static void dtoddi(DatasetDTO datasetDto, OutputStream outputStream) thr xmlw.writeDefaultNamespace("ddi:codebook:2_5"); xmlw.writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); xmlw.writeAttribute("xsi:schemaLocation", DDIExporter.DEFAULT_XML_NAMESPACE + " " + DDIExporter.DEFAULT_XML_SCHEMALOCATION); - writeAttribute(xmlw, "version", DDIExporter.DEFAULT_XML_VERSION); + xmlw.writeAttribute("version", DDIExporter.DEFAULT_XML_VERSION); if(DvObjectContainer.isMetadataLanguageSet(datasetDto.getMetadataLanguage())) { - writeAttribute(xmlw, "xml:lang", datasetDto.getMetadataLanguage()); + xmlw.writeAttribute("xml:lang", datasetDto.getMetadataLanguage()); } createStdyDscr(xmlw, datasetDto); createOtherMats(xmlw, datasetDto.getDatasetVersion().getFiles()); @@ -133,9 +135,9 @@ public static void datasetJson2ddi(JsonObject datasetDtoAsJson, JsonArray fileDe xmlw.writeDefaultNamespace("ddi:codebook:2_5"); xmlw.writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); xmlw.writeAttribute("xsi:schemaLocation", DDIExporter.DEFAULT_XML_NAMESPACE + " " + DDIExporter.DEFAULT_XML_SCHEMALOCATION); - writeAttribute(xmlw, "version", DDIExporter.DEFAULT_XML_VERSION); + xmlw.writeAttribute("version", DDIExporter.DEFAULT_XML_VERSION); if(DvObjectContainer.isMetadataLanguageSet(datasetDto.getMetadataLanguage())) { - writeAttribute(xmlw, "xml:lang", datasetDto.getMetadataLanguage()); + xmlw.writeAttribute("xml:lang", datasetDto.getMetadataLanguage()); } createStdyDscr(xmlw, datasetDto); createFileDscr(xmlw, fileDetails); @@ -186,15 +188,15 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) xmlw.writeStartElement("citation"); xmlw.writeStartElement("titlStmt"); - writeFullElement(xmlw, "titl", dto2Primitive(version, DatasetFieldConstant.title), datasetDto.getMetadataLanguage()); - writeFullElement(xmlw, "subTitl", dto2Primitive(version, DatasetFieldConstant.subTitle)); + XmlWriterUtil.writeFullElement(xmlw, "titl", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.title), datasetDto.getMetadataLanguage()); + XmlWriterUtil.writeFullElement(xmlw, "subTitl", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.subTitle)); FieldDTO altField = dto2FieldDTO( version, DatasetFieldConstant.alternativeTitle, "citation" ); if (altField != null) { writeMultipleElement(xmlw, "altTitl", altField, datasetDto.getMetadataLanguage()); } xmlw.writeStartElement("IDNo"); - writeAttribute(xmlw, "agency", persistentAgency); + XmlWriterUtil.writeAttribute(xmlw, "agency", persistentAgency); xmlw.writeCharacters(pid); @@ -218,23 +220,23 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) boolean excludeRepository = settingsService.isTrueForKey(SettingsServiceBean.Key.ExportInstallationAsDistributorOnlyWhenNotSet, false); if (!StringUtils.isEmpty(datasetDto.getPublisher()) && !(excludeRepository && distributorSet)) { xmlw.writeStartElement("distrbtr"); - writeAttribute(xmlw, "source", "archive"); + xmlw.writeAttribute("source", "archive"); xmlw.writeCharacters(datasetDto.getPublisher()); xmlw.writeEndElement(); //distrbtr } writeDistributorsElement(xmlw, version, datasetDto.getMetadataLanguage()); writeContactsElement(xmlw, version); /* per SCHEMA, depositr comes before depDate! - L.A. */ - writeFullElement(xmlw, "depositr", dto2Primitive(version, DatasetFieldConstant.depositor)); + XmlWriterUtil.writeFullElement(xmlw, "depositr", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.depositor)); /* ... and depDate comes before distDate - L.A. */ - writeFullElement(xmlw, "depDate", dto2Primitive(version, DatasetFieldConstant.dateOfDeposit)); - writeFullElement(xmlw, "distDate", dto2Primitive(version, DatasetFieldConstant.distributionDate)); + XmlWriterUtil.writeFullElement(xmlw, "depDate", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.dateOfDeposit)); + XmlWriterUtil.writeFullElement(xmlw, "distDate", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.distributionDate)); xmlw.writeEndElement(); // diststmt writeSeriesElement(xmlw, version); xmlw.writeStartElement("holdings"); - writeAttribute(xmlw, "URI", pidUri); + XmlWriterUtil.writeAttribute(xmlw, "URI", pidUri); xmlw.writeEndElement(); //holdings xmlw.writeEndElement(); // citation @@ -247,7 +249,7 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) writeSubjectElement(xmlw, version, datasetDto.getMetadataLanguage()); //Subject and Keywords writeAbstractElement(xmlw, version, datasetDto.getMetadataLanguage()); // Description writeSummaryDescriptionElement(xmlw, version, datasetDto.getMetadataLanguage()); - writeFullElement(xmlw, "notes", dto2Primitive(version, DatasetFieldConstant.notesText)); + XmlWriterUtil.writeFullElement(xmlw, "notes", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.notesText)); //////// xmlw.writeEndElement(); // stdyInfo @@ -255,7 +257,7 @@ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) writeDataAccess(xmlw , version); writeOtherStudyMaterial(xmlw , version); - writeFullElement(xmlw, "notes", dto2Primitive(version, DatasetFieldConstant.datasetLevelErrorNotes)); + XmlWriterUtil.writeFullElement(xmlw, "notes", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.datasetLevelErrorNotes)); xmlw.writeEndElement(); // stdyDscr @@ -274,10 +276,10 @@ private static void writeOtherStudyMaterial(XMLStreamWriter xmlw , DatasetVersio return; } xmlw.writeStartElement("othrStdyMat"); - writeFullElementList(xmlw, "relMat", relMaterials); - writeFullElementList(xmlw, "relStdy", relDatasets); + XmlWriterUtil.writeFullElementList(xmlw, "relMat", relMaterials); + XmlWriterUtil.writeFullElementList(xmlw, "relStdy", relDatasets); writeRelPublElement(xmlw, version); - writeFullElementList(xmlw, "othRefs", relReferences); + XmlWriterUtil.writeFullElementList(xmlw, "othRefs", relReferences); xmlw.writeEndElement(); //othrStdyMat } @@ -292,29 +294,29 @@ private static void writeDataAccess(XMLStreamWriter xmlw , DatasetVersionDTO ver xmlw.writeStartElement("dataAccs"); xmlw.writeStartElement("setAvail"); - writeFullElement(xmlw, "accsPlac", version.getDataAccessPlace()); - writeFullElement(xmlw, "origArch", version.getOriginalArchive()); - writeFullElement(xmlw, "avlStatus", version.getAvailabilityStatus()); - writeFullElement(xmlw, "collSize", version.getSizeOfCollection()); - writeFullElement(xmlw, "complete", version.getStudyCompletion()); + XmlWriterUtil.writeFullElement(xmlw, "accsPlac", version.getDataAccessPlace()); + XmlWriterUtil.writeFullElement(xmlw, "origArch", version.getOriginalArchive()); + XmlWriterUtil.writeFullElement(xmlw, "avlStatus", version.getAvailabilityStatus()); + XmlWriterUtil.writeFullElement(xmlw, "collSize", version.getSizeOfCollection()); + XmlWriterUtil.writeFullElement(xmlw, "complete", version.getStudyCompletion()); xmlw.writeEndElement(); //setAvail xmlw.writeStartElement("useStmt"); - writeFullElement(xmlw, "confDec", version.getConfidentialityDeclaration()); - writeFullElement(xmlw, "specPerm", version.getSpecialPermissions()); - writeFullElement(xmlw, "restrctn", version.getRestrictions()); - writeFullElement(xmlw, "contact", version.getContactForAccess()); - writeFullElement(xmlw, "citReq", version.getCitationRequirements()); - writeFullElement(xmlw, "deposReq", version.getDepositorRequirements()); - writeFullElement(xmlw, "conditions", version.getConditions()); - writeFullElement(xmlw, "disclaimer", version.getDisclaimer()); + XmlWriterUtil.writeFullElement(xmlw, "confDec", version.getConfidentialityDeclaration()); + XmlWriterUtil.writeFullElement(xmlw, "specPerm", version.getSpecialPermissions()); + XmlWriterUtil.writeFullElement(xmlw, "restrctn", version.getRestrictions()); + XmlWriterUtil.writeFullElement(xmlw, "contact", version.getContactForAccess()); + XmlWriterUtil.writeFullElement(xmlw, "citReq", version.getCitationRequirements()); + XmlWriterUtil.writeFullElement(xmlw, "deposReq", version.getDepositorRequirements()); + XmlWriterUtil.writeFullElement(xmlw, "conditions", version.getConditions()); + XmlWriterUtil.writeFullElement(xmlw, "disclaimer", version.getDisclaimer()); xmlw.writeEndElement(); //useStmt /* any s: */ if (version.getTermsOfAccess() != null && !version.getTermsOfAccess().trim().equals("")) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "type", NOTE_TYPE_TERMS_OF_ACCESS); - writeAttribute(xmlw, "level", LEVEL_DV); + xmlw.writeAttribute("type", NOTE_TYPE_TERMS_OF_ACCESS); + xmlw.writeAttribute("level", LEVEL_DV); xmlw.writeCharacters(version.getTermsOfAccess()); xmlw.writeEndElement(); //notes } @@ -341,9 +343,9 @@ private static void writeDocDescElement (XMLStreamWriter xmlw, DatasetDTO datase xmlw.writeStartElement("docDscr"); xmlw.writeStartElement("citation"); xmlw.writeStartElement("titlStmt"); - writeFullElement(xmlw, "titl", dto2Primitive(version, DatasetFieldConstant.title), datasetDto.getMetadataLanguage()); + XmlWriterUtil.writeFullElement(xmlw, "titl", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.title), datasetDto.getMetadataLanguage()); xmlw.writeStartElement("IDNo"); - writeAttribute(xmlw, "agency", persistentAgency); + XmlWriterUtil.writeAttribute(xmlw, "agency", persistentAgency); xmlw.writeCharacters(persistentProtocol + ":" + persistentAuthority + "/" + persistentId); xmlw.writeEndElement(); // IDNo xmlw.writeEndElement(); // titlStmt @@ -351,11 +353,11 @@ private static void writeDocDescElement (XMLStreamWriter xmlw, DatasetDTO datase //The doc is always published by the Dataverse Repository if (!StringUtils.isEmpty(datasetDto.getPublisher())) { xmlw.writeStartElement("distrbtr"); - writeAttribute(xmlw, "source", "archive"); + xmlw.writeAttribute("source", "archive"); xmlw.writeCharacters(datasetDto.getPublisher()); xmlw.writeEndElement(); // distrbtr } - writeFullElement(xmlw, "distDate", datasetDto.getPublicationDate()); + XmlWriterUtil.writeFullElement(xmlw, "distDate", datasetDto.getPublicationDate()); xmlw.writeEndElement(); // diststmt writeVersionStatement(xmlw, version); @@ -369,10 +371,10 @@ private static void writeDocDescElement (XMLStreamWriter xmlw, DatasetDTO datase private static void writeVersionStatement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO) throws XMLStreamException{ xmlw.writeStartElement("verStmt"); - writeAttribute(xmlw,"source","archive"); + XmlWriterUtil.writeAttribute(xmlw,"source","archive"); xmlw.writeStartElement("version"); - writeAttribute(xmlw,"date", datasetVersionDTO.getReleaseTime().substring(0, 10)); - writeAttribute(xmlw,"type", datasetVersionDTO.getVersionState().toString()); + XmlWriterUtil.writeAttribute(xmlw,"date", datasetVersionDTO.getReleaseTime().substring(0, 10)); + XmlWriterUtil.writeAttribute(xmlw,"type", datasetVersionDTO.getVersionState().toString()); xmlw.writeCharacters(datasetVersionDTO.getVersionNumber().toString()); xmlw.writeEndElement(); // version xmlw.writeEndElement(); // verStmt @@ -523,14 +525,14 @@ private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, Dataset * "" entries, then all the "" ones: */ for (String nationEntry : nationList) { - writeFullElement(xmlw, "nation", nationEntry); + XmlWriterUtil.writeFullElement(xmlw, "nation", nationEntry); } for (String geogCoverEntry : geogCoverList) { - writeFullElement(xmlw, "geogCover", geogCoverEntry); + XmlWriterUtil.writeFullElement(xmlw, "geogCover", geogCoverEntry); } } - writeFullElementList(xmlw, "geogUnit", dto2PrimitiveList(datasetVersionDTO, DatasetFieldConstant.geographicUnit)); + XmlWriterUtil.writeFullElementList(xmlw, "geogUnit", dto2PrimitiveList(datasetVersionDTO, DatasetFieldConstant.geographicUnit)); /* Only 1 geoBndBox is allowed in the DDI. So, I'm just going to arbitrarily use the first one, and ignore the rest! -L.A. */ @@ -563,16 +565,16 @@ private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, Dataset */ if (geoBndBoxMap.get("westBL") != null) { - writeFullElement(xmlw, "westBL", geoBndBoxMap.get("westBL")); + XmlWriterUtil.writeFullElement(xmlw, "westBL", geoBndBoxMap.get("westBL")); } if (geoBndBoxMap.get("eastBL") != null) { - writeFullElement(xmlw, "eastBL", geoBndBoxMap.get("eastBL")); + XmlWriterUtil.writeFullElement(xmlw, "eastBL", geoBndBoxMap.get("eastBL")); } if (geoBndBoxMap.get("southBL") != null) { - writeFullElement(xmlw, "southBL", geoBndBoxMap.get("southBL")); + XmlWriterUtil.writeFullElement(xmlw, "southBL", geoBndBoxMap.get("southBL")); } if (geoBndBoxMap.get("northBL") != null) { - writeFullElement(xmlw, "northBL", geoBndBoxMap.get("northBL")); + XmlWriterUtil.writeFullElement(xmlw, "northBL", geoBndBoxMap.get("northBL")); } xmlw.writeEndElement(); @@ -580,7 +582,7 @@ private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, Dataset /* analyUnit: */ if (unitOfAnalysisDTO != null) { - writeI18NElementList(xmlw, "anlyUnit", unitOfAnalysisDTO.getMultipleVocab(), "unitOfAnalysis", unitOfAnalysisDTO.getTypeClass(), "socialscience", lang); + XmlWriterUtil.writeI18NElementList(xmlw, "anlyUnit", unitOfAnalysisDTO.getMultipleVocab(), "unitOfAnalysis", unitOfAnalysisDTO.getTypeClass(), "socialscience", lang); } @@ -600,16 +602,16 @@ private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, Dataset private static void writeMultipleElement(XMLStreamWriter xmlw, String element, FieldDTO fieldDTO, String lang) throws XMLStreamException { for (String value : fieldDTO.getMultiplePrimitive()) { //Write multiple lang vals for controlled vocab, otherwise don't include any lang tag - writeFullElement(xmlw, element, value, fieldDTO.isControlledVocabularyField() ? lang : null); + XmlWriterUtil.writeFullElement(xmlw, element, value, fieldDTO.isControlledVocabularyField() ? lang : null); } } private static void writeDateElement(XMLStreamWriter xmlw, String element, String cycle, String event, String dateIn) throws XMLStreamException { xmlw.writeStartElement(element); - writeAttribute(xmlw, "cycle", cycle); - writeAttribute(xmlw, "event", event); - writeAttribute(xmlw, "date", dateIn); + XmlWriterUtil.writeAttribute(xmlw, "cycle", cycle); + XmlWriterUtil.writeAttribute(xmlw, "event", event); + XmlWriterUtil.writeAttribute(xmlw, "date", dateIn); xmlw.writeCharacters(dateIn); xmlw.writeEndElement(); @@ -641,15 +643,15 @@ private static void writeDateElement(XMLStreamWriter xmlw, String element, Strin private static void writeMethodElement(XMLStreamWriter xmlw , DatasetVersionDTO version, String lang) throws XMLStreamException{ xmlw.writeStartElement("method"); xmlw.writeStartElement("dataColl"); - writeI18NElement(xmlw, "timeMeth", version, DatasetFieldConstant.timeMethod,lang); - writeI18NElement(xmlw, "dataCollector", version, DatasetFieldConstant.dataCollector, lang); - writeI18NElement(xmlw, "collectorTraining", version, DatasetFieldConstant.collectorTraining, lang); - writeI18NElement(xmlw, "frequenc", version, DatasetFieldConstant.frequencyOfDataCollection, lang); - writeI18NElement(xmlw, "sampProc", version, DatasetFieldConstant.samplingProcedure, lang); + XmlWriterUtil.writeI18NElement(xmlw, "timeMeth", version, DatasetFieldConstant.timeMethod,lang); + XmlWriterUtil.writeI18NElement(xmlw, "dataCollector", version, DatasetFieldConstant.dataCollector, lang); + XmlWriterUtil.writeI18NElement(xmlw, "collectorTraining", version, DatasetFieldConstant.collectorTraining, lang); + XmlWriterUtil.writeI18NElement(xmlw, "frequenc", version, DatasetFieldConstant.frequencyOfDataCollection, lang); + XmlWriterUtil.writeI18NElement(xmlw, "sampProc", version, DatasetFieldConstant.samplingProcedure, lang); writeTargetSampleElement(xmlw, version); - writeI18NElement(xmlw, "deviat", version, DatasetFieldConstant.deviationsFromSampleDesign, lang); + XmlWriterUtil.writeI18NElement(xmlw, "deviat", version, DatasetFieldConstant.deviationsFromSampleDesign, lang); /* comes before : */ FieldDTO collModeFieldDTO = dto2FieldDTO(version, DatasetFieldConstant.collectionMode, "socialscience"); @@ -658,37 +660,37 @@ private static void writeMethodElement(XMLStreamWriter xmlw , DatasetVersionDTO // Below is a backward compatibility check allowing export to work in // an instance where the metadata block has not been updated yet. if (collModeFieldDTO.getMultiple()) { - writeI18NElementList(xmlw, "collMode", collModeFieldDTO.getMultipleVocab(), DatasetFieldConstant.collectionMode, collModeFieldDTO.getTypeClass(), "socialscience", lang); + XmlWriterUtil.writeI18NElementList(xmlw, "collMode", collModeFieldDTO.getMultipleVocab(), DatasetFieldConstant.collectionMode, collModeFieldDTO.getTypeClass(), "socialscience", lang); } else { - writeI18NElement(xmlw, "collMode", version, DatasetFieldConstant.collectionMode, lang); + XmlWriterUtil.writeI18NElement(xmlw, "collMode", version, DatasetFieldConstant.collectionMode, lang); } } /* and so does : */ - writeI18NElement(xmlw, "resInstru", version, DatasetFieldConstant.researchInstrument, lang); + XmlWriterUtil.writeI18NElement(xmlw, "resInstru", version, DatasetFieldConstant.researchInstrument, lang); xmlw.writeStartElement("sources"); - writeFullElementList(xmlw, "dataSrc", dto2PrimitiveList(version, DatasetFieldConstant.dataSources)); - writeI18NElement(xmlw, "srcOrig", version, DatasetFieldConstant.originOfSources, lang); - writeI18NElement(xmlw, "srcChar", version, DatasetFieldConstant.characteristicOfSources, lang); - writeI18NElement(xmlw, "srcDocu", version, DatasetFieldConstant.accessToSources, lang); + XmlWriterUtil.writeFullElementList(xmlw, "dataSrc", dto2PrimitiveList(version, DatasetFieldConstant.dataSources)); + XmlWriterUtil.writeI18NElement(xmlw, "srcOrig", version, DatasetFieldConstant.originOfSources, lang); + XmlWriterUtil.writeI18NElement(xmlw, "srcChar", version, DatasetFieldConstant.characteristicOfSources, lang); + XmlWriterUtil.writeI18NElement(xmlw, "srcDocu", version, DatasetFieldConstant.accessToSources, lang); xmlw.writeEndElement(); //sources - writeI18NElement(xmlw, "collSitu", version, DatasetFieldConstant.dataCollectionSituation, lang); - writeI18NElement(xmlw, "actMin", version, DatasetFieldConstant.actionsToMinimizeLoss, lang); + XmlWriterUtil.writeI18NElement(xmlw, "collSitu", version, DatasetFieldConstant.dataCollectionSituation, lang); + XmlWriterUtil.writeI18NElement(xmlw, "actMin", version, DatasetFieldConstant.actionsToMinimizeLoss, lang); /* "" has the uppercase C: */ - writeI18NElement(xmlw, "ConOps", version, DatasetFieldConstant.controlOperations, lang); - writeI18NElement(xmlw, "weight", version, DatasetFieldConstant.weighting, lang); - writeI18NElement(xmlw, "cleanOps", version, DatasetFieldConstant.cleaningOperations, lang); + XmlWriterUtil.writeI18NElement(xmlw, "ConOps", version, DatasetFieldConstant.controlOperations, lang); + XmlWriterUtil.writeI18NElement(xmlw, "weight", version, DatasetFieldConstant.weighting, lang); + XmlWriterUtil.writeI18NElement(xmlw, "cleanOps", version, DatasetFieldConstant.cleaningOperations, lang); xmlw.writeEndElement(); //dataColl /* before : */ writeNotesElement(xmlw, version); xmlw.writeStartElement("anlyInfo"); - //writeFullElement(xmlw, "anylInfo", dto2Primitive(version, DatasetFieldConstant.datasetLevelErrorNotes)); - writeI18NElement(xmlw, "respRate", version, DatasetFieldConstant.responseRate, lang); - writeI18NElement(xmlw, "EstSmpErr", version, DatasetFieldConstant.samplingErrorEstimates, lang); - writeI18NElement(xmlw, "dataAppr", version, DatasetFieldConstant.otherDataAppraisal, lang); + //XmlWriterUtil.writeFullElement(xmlw, "anylInfo", dto2Primitive(version, DatasetFieldConstant.datasetLevelErrorNotes)); + XmlWriterUtil.writeI18NElement(xmlw, "respRate", version, DatasetFieldConstant.responseRate, lang); + XmlWriterUtil.writeI18NElement(xmlw, "EstSmpErr", version, DatasetFieldConstant.samplingErrorEstimates, lang); + XmlWriterUtil.writeI18NElement(xmlw, "dataAppr", version, DatasetFieldConstant.otherDataAppraisal, lang); xmlw.writeEndElement(); //anlyInfo xmlw.writeEndElement();//method @@ -705,7 +707,7 @@ private static void writeSubjectElement(XMLStreamWriter xmlw, DatasetVersionDTO if (CITATION_BLOCK_NAME.equals(key)) { for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.subject.equals(fieldDTO.getTypeName())) { - writeI18NElementList(xmlw, "keyword", fieldDTO.getMultipleVocab(), "subject", + XmlWriterUtil.writeI18NElementList(xmlw, "keyword", fieldDTO.getMultipleVocab(), "subject", fieldDTO.getTypeClass(), "citation", lang); } @@ -732,14 +734,10 @@ private static void writeSubjectElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!keywordValue.isEmpty()) { xmlw.writeStartElement("keyword"); - if (!keywordVocab.isEmpty()) { - writeAttribute(xmlw, "vocab", keywordVocab); - } - if (!keywordURI.isEmpty()) { - writeAttribute(xmlw, "vocabURI", keywordURI); - } + XmlWriterUtil.writeAttribute(xmlw, "vocab", keywordVocab); + XmlWriterUtil.writeAttribute(xmlw, "vocabURI", keywordURI); if (lang != null && isCVV) { - writeAttribute(xmlw, "xml:lang", defaultLocale.getLanguage()); + XmlWriterUtil.writeAttribute(xmlw, "xml:lang", defaultLocale.getLanguage()); xmlw.writeCharacters(ControlledVocabularyValue.getLocaleStrValue(keywordValue, DatasetFieldConstant.keywordValue, CITATION_BLOCK_NAME, defaultLocale, true)); @@ -753,13 +751,9 @@ DatasetFieldConstant.keywordValue, CITATION_BLOCK_NAME, new Locale(lang), false); if (translatedValue != null) { xmlw.writeStartElement("keyword"); - if (!keywordVocab.isEmpty()) { - writeAttribute(xmlw, "vocab", keywordVocab); - } - if (!keywordURI.isEmpty()) { - writeAttribute(xmlw, "vocabURI", keywordURI); - } - writeAttribute(xmlw, "xml:lang", lang); + XmlWriterUtil.writeAttribute(xmlw, "vocab", keywordVocab); + XmlWriterUtil.writeAttribute(xmlw, "vocabURI", keywordURI); + XmlWriterUtil.writeAttribute(xmlw, "xml:lang", lang); xmlw.writeCharacters(translatedValue); xmlw.writeEndElement(); // Keyword } @@ -792,14 +786,10 @@ DatasetFieldConstant.keywordValue, CITATION_BLOCK_NAME, new Locale(lang), } if (!topicClassificationValue.isEmpty()) { xmlw.writeStartElement("topcClas"); - if (!topicClassificationVocab.isEmpty()) { - writeAttribute(xmlw, "vocab", topicClassificationVocab); - } - if (!topicClassificationURI.isEmpty()) { - writeAttribute(xmlw, "vocabURI", topicClassificationURI); - } + XmlWriterUtil.writeAttribute(xmlw, "vocab", topicClassificationVocab); + XmlWriterUtil.writeAttribute(xmlw, "vocabURI", topicClassificationURI); if (lang != null && isCVV) { - writeAttribute(xmlw, "xml:lang", defaultLocale.getLanguage()); + XmlWriterUtil.writeAttribute(xmlw, "xml:lang", defaultLocale.getLanguage()); xmlw.writeCharacters(ControlledVocabularyValue.getLocaleStrValue( topicClassificationValue, DatasetFieldConstant.topicClassValue, CITATION_BLOCK_NAME, defaultLocale, true)); @@ -813,13 +803,9 @@ DatasetFieldConstant.keywordValue, CITATION_BLOCK_NAME, new Locale(lang), CITATION_BLOCK_NAME, new Locale(lang), false); if (translatedValue != null) { xmlw.writeStartElement("topcClas"); - if (!topicClassificationVocab.isEmpty()) { - writeAttribute(xmlw, "vocab", topicClassificationVocab); - } - if (!topicClassificationURI.isEmpty()) { - writeAttribute(xmlw, "vocabURI", topicClassificationURI); - } - writeAttribute(xmlw, "xml:lang", lang); + XmlWriterUtil.writeAttribute(xmlw, "vocab", topicClassificationVocab); + XmlWriterUtil.writeAttribute(xmlw, "vocabURI", topicClassificationURI); + XmlWriterUtil.writeAttribute(xmlw, "xml:lang", lang); xmlw.writeCharacters(translatedValue); xmlw.writeEndElement(); // topcClas } @@ -857,7 +843,7 @@ private static void writeAuthorsElement(XMLStreamWriter xmlw, DatasetVersionDTO if (!authorName.isEmpty()){ xmlw.writeStartElement("AuthEnty"); if(!authorAffiliation.isEmpty()){ - writeAttribute(xmlw,"affiliation",authorAffiliation); + XmlWriterUtil.writeAttribute(xmlw,"affiliation",authorAffiliation); } xmlw.writeCharacters(authorName); xmlw.writeEndElement(); //AuthEnty @@ -880,7 +866,7 @@ private static void writeAuthorsElement(XMLStreamWriter xmlw, DatasetVersionDTO if (!contributorName.isEmpty()){ xmlw.writeStartElement("othId"); if(!contributorType.isEmpty()){ - writeAttribute(xmlw,"role", contributorType); + XmlWriterUtil.writeAttribute(xmlw,"role", contributorType); } xmlw.writeCharacters(contributorName); xmlw.writeEndElement(); //othId @@ -922,10 +908,10 @@ private static void writeContactsElement(XMLStreamWriter xmlw, DatasetVersionDTO if (!datasetContactName.isEmpty()){ xmlw.writeStartElement("contact"); if(!datasetContactAffiliation.isEmpty()){ - writeAttribute(xmlw,"affiliation",datasetContactAffiliation); + XmlWriterUtil.writeAttribute(xmlw,"affiliation",datasetContactAffiliation); } if(!datasetContactEmail.isEmpty()){ - writeAttribute(xmlw,"email",datasetContactEmail); + XmlWriterUtil.writeAttribute(xmlw,"email",datasetContactEmail); } xmlw.writeCharacters(datasetContactName); xmlw.writeEndElement(); //AuthEnty @@ -969,14 +955,10 @@ private static void writeProducersElement(XMLStreamWriter xmlw, DatasetVersionDT } if (!producerName.isEmpty()) { xmlw.writeStartElement("producer"); - if (!producerAffiliation.isEmpty()) { - writeAttribute(xmlw, "affiliation", producerAffiliation); - } - if (!producerAbbreviation.isEmpty()) { - writeAttribute(xmlw, "abbr", producerAbbreviation); - } + XmlWriterUtil.writeAttribute(xmlw, "affiliation", producerAffiliation); + XmlWriterUtil.writeAttribute(xmlw, "abbr", producerAbbreviation); /*if (!producerLogo.isEmpty()) { - writeAttribute(xmlw, "role", producerLogo); + XmlWriterUtil.writeAttribute(xmlw, "role", producerLogo); }*/ xmlw.writeCharacters(producerName); xmlw.writeEndElement(); //AuthEnty @@ -987,7 +969,7 @@ private static void writeProducersElement(XMLStreamWriter xmlw, DatasetVersionDT } } } - writeFullElement(xmlw, "prodDate", dto2Primitive(version, DatasetFieldConstant.productionDate)); + XmlWriterUtil.writeFullElement(xmlw, "prodDate", XmlWriterUtil.dto2Primitive(version, DatasetFieldConstant.productionDate)); // productionPlace was made multiple as of 5.14: // (a quick backward compatibility check was added to dto2PrimitiveList(), // see the method for details) @@ -1033,17 +1015,11 @@ private static void writeDistributorsElement(XMLStreamWriter xmlw, DatasetVersio if (!distributorName.isEmpty()) { xmlw.writeStartElement("distrbtr"); if(DvObjectContainer.isMetadataLanguageSet(lang)) { - writeAttribute(xmlw, "xml:lang", lang); - } - if (!distributorAffiliation.isEmpty()) { - writeAttribute(xmlw, "affiliation", distributorAffiliation); - } - if (!distributorAbbreviation.isEmpty()) { - writeAttribute(xmlw, "abbr", distributorAbbreviation); - } - if (!distributorURL.isEmpty()) { - writeAttribute(xmlw, "URI", distributorURL); + xmlw.writeAttribute("xml:lang", lang); } + XmlWriterUtil.writeAttribute(xmlw, "affiliation", distributorAffiliation); + XmlWriterUtil.writeAttribute(xmlw, "abbr", distributorAbbreviation); + XmlWriterUtil.writeAttribute(xmlw, "URI", distributorURL); xmlw.writeCharacters(distributorName); xmlw.writeEndElement(); //AuthEnty } @@ -1102,7 +1078,7 @@ private static void writeRelPublElement(XMLStreamWriter xmlw, DatasetVersionDTO (In other words - titlStmt is mandatory! -L.A.) */ xmlw.writeStartElement("titlStmt"); - writeFullElement(xmlw, "titl", citation); + XmlWriterUtil.writeFullElement(xmlw, "titl", citation); if (IDNo != null && !IDNo.trim().equals("")) { xmlw.writeStartElement("IDNo"); @@ -1115,7 +1091,7 @@ private static void writeRelPublElement(XMLStreamWriter xmlw, DatasetVersionDTO xmlw.writeEndElement(); // titlStmt - writeFullElement(xmlw,"biblCit",citation); + XmlWriterUtil.writeFullElement(xmlw,"biblCit",citation); xmlw.writeEndElement(); //citation if (url != null && !url.trim().equals("") ) { xmlw.writeStartElement("ExtLink"); @@ -1164,10 +1140,10 @@ private static void writeAbstractElement(XMLStreamWriter xmlw, DatasetVersionDTO if (!descriptionText.isEmpty()){ xmlw.writeStartElement("abstract"); if(!descriptionDate.isEmpty()){ - writeAttribute(xmlw,"date",descriptionDate); + XmlWriterUtil.writeAttribute(xmlw,"date",descriptionDate); } if(DvObjectContainer.isMetadataLanguageSet(lang)) { - writeAttribute(xmlw, "xml:lang", lang); + xmlw.writeAttribute("xml:lang", lang); } xmlw.writeCharacters(descriptionText); xmlw.writeEndElement(); //abstract @@ -1201,7 +1177,7 @@ private static void writeGrantElement(XMLStreamWriter xmlw, DatasetVersionDTO da if (!grantNumber.isEmpty()){ xmlw.writeStartElement("grantNo"); if(!grantAgency.isEmpty()){ - writeAttribute(xmlw,"agency",grantAgency); + XmlWriterUtil.writeAttribute(xmlw,"agency",grantAgency); } xmlw.writeCharacters(grantNumber); xmlw.writeEndElement(); //grantno @@ -1235,7 +1211,7 @@ private static void writeOtherIdElement(XMLStreamWriter xmlw, DatasetVersionDTO if (!otherId.isEmpty()){ xmlw.writeStartElement("IDNo"); if(!otherIdAgency.isEmpty()){ - writeAttribute(xmlw,"agency",otherIdAgency); + XmlWriterUtil.writeAttribute(xmlw,"agency",otherIdAgency); } xmlw.writeCharacters(otherId); xmlw.writeEndElement(); //IDNo @@ -1269,7 +1245,7 @@ private static void writeSoftwareElement(XMLStreamWriter xmlw, DatasetVersionDTO if (!softwareName.isEmpty()){ xmlw.writeStartElement("software"); if(!softwareVersion.isEmpty()){ - writeAttribute(xmlw,"version",softwareVersion); + XmlWriterUtil.writeAttribute(xmlw,"version",softwareVersion); } xmlw.writeCharacters(softwareName); xmlw.writeEndElement(); //software @@ -1384,10 +1360,10 @@ private static void writeNotesElement(XMLStreamWriter xmlw, DatasetVersionDTO da if (!notesText.isEmpty()) { xmlw.writeStartElement("notes"); if(!notesType.isEmpty()){ - writeAttribute(xmlw,"type",notesType); + XmlWriterUtil.writeAttribute(xmlw,"type",notesType); } if(!notesSubject.isEmpty()){ - writeAttribute(xmlw,"subject",notesSubject); + XmlWriterUtil.writeAttribute(xmlw,"subject",notesSubject); } xmlw.writeCharacters(notesText); xmlw.writeEndElement(); @@ -1412,14 +1388,14 @@ private static void createOtherMats(XMLStreamWriter xmlw, List fileDtos // and observations, etc.) if (fileDTo.getDataFile().getDataTables() == null || fileDTo.getDataFile().getDataTables().isEmpty()) { xmlw.writeStartElement("otherMat"); - writeAttribute(xmlw, "ID", "f" + fileDTo.getDataFile().getId()); + XmlWriterUtil.writeAttribute(xmlw, "ID", "f" + fileDTo.getDataFile().getId()); String pidURL = fileDTo.getDataFile().getPidURL(); if (pidURL != null && !pidURL.isEmpty()){ - writeAttribute(xmlw, "URI", pidURL); + xmlw.writeAttribute("URI", pidURL); } else { - writeAttribute(xmlw, "URI", dataverseUrl + "/api/access/datafile/" + fileDTo.getDataFile().getId()); + xmlw.writeAttribute("URI", dataverseUrl + "/api/access/datafile/" + fileDTo.getDataFile().getId()); } - writeAttribute(xmlw, "level", "datafile"); + xmlw.writeAttribute("level", "datafile"); xmlw.writeStartElement("labl"); xmlw.writeCharacters(fileDTo.getDataFile().getFilename()); xmlw.writeEndElement(); // labl @@ -1430,9 +1406,9 @@ private static void createOtherMats(XMLStreamWriter xmlw, List fileDtos String contentType = fileDTo.getDataFile().getContentType(); if (!StringUtilisEmpty(contentType)) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "level", LEVEL_FILE); - writeAttribute(xmlw, "type", NOTE_TYPE_CONTENTTYPE); - writeAttribute(xmlw, "subject", NOTE_SUBJECT_CONTENTTYPE); + xmlw.writeAttribute("level", LEVEL_FILE); + xmlw.writeAttribute("type", NOTE_TYPE_CONTENTTYPE); + xmlw.writeAttribute("subject", NOTE_SUBJECT_CONTENTTYPE); xmlw.writeCharacters(contentType); xmlw.writeEndElement(); // notes } @@ -1460,14 +1436,14 @@ private static void createOtherMatsFromFileMetadatas(XMLStreamWriter xmlw, JsonA // and observations, etc.) if (!fileJson.containsKey("dataTables")) { xmlw.writeStartElement("otherMat"); - writeAttribute(xmlw, "ID", "f" + fileJson.getJsonNumber(("id").toString())); + xmlw.writeAttribute("ID", "f" + fileJson.getJsonNumber(("id").toString())); if (fileJson.containsKey("pidUrl")){ - writeAttribute(xmlw, "URI", fileJson.getString("pidUrl")); + XmlWriterUtil.writeAttribute(xmlw, "URI", fileJson.getString("pidUrl")); } else { - writeAttribute(xmlw, "URI", dataverseUrl + "/api/access/datafile/" + fileJson.getJsonNumber("id").toString()); + xmlw.writeAttribute("URI", dataverseUrl + "/api/access/datafile/" + fileJson.getJsonNumber("id").toString()); } - writeAttribute(xmlw, "level", "datafile"); + xmlw.writeAttribute("level", "datafile"); xmlw.writeStartElement("labl"); xmlw.writeCharacters(fileJson.getString("filename")); xmlw.writeEndElement(); // labl @@ -1482,9 +1458,9 @@ private static void createOtherMatsFromFileMetadatas(XMLStreamWriter xmlw, JsonA // specially formatted notes section: if (fileJson.containsKey("contentType")) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "level", LEVEL_FILE); - writeAttribute(xmlw, "type", NOTE_TYPE_CONTENTTYPE); - writeAttribute(xmlw, "subject", NOTE_SUBJECT_CONTENTTYPE); + xmlw.writeAttribute("level", LEVEL_FILE); + xmlw.writeAttribute("type", NOTE_TYPE_CONTENTTYPE); + xmlw.writeAttribute("subject", NOTE_SUBJECT_CONTENTTYPE); xmlw.writeCharacters(fileJson.getString("contentType")); xmlw.writeEndElement(); // notes } @@ -1502,33 +1478,7 @@ private static void writeFileDescription(XMLStreamWriter xmlw, FileDTO fileDTo) xmlw.writeEndElement(); // txt } - private static String dto2Primitive(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName) { - for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { - MetadataBlockDTO value = entry.getValue(); - for (FieldDTO fieldDTO : value.getFields()) { - if (datasetFieldTypeName.equals(fieldDTO.getTypeName())) { - return fieldDTO.getSinglePrimitive(); - } - } - } - return null; - } - - private static String dto2Primitive(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName, Locale locale) { - for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { - MetadataBlockDTO value = entry.getValue(); - for (FieldDTO fieldDTO : value.getFields()) { - if (datasetFieldTypeName.equals(fieldDTO.getTypeName())) { - String rawVal = fieldDTO.getSinglePrimitive(); - if (fieldDTO.isControlledVocabularyField()) { - return ControlledVocabularyValue.getLocaleStrValue(rawVal, datasetFieldTypeName, value.getName(), - locale, false); - } - } - } - } - return null; - } + private static List dto2PrimitiveList(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName) { for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { @@ -1562,104 +1512,6 @@ private static FieldDTO dto2FieldDTO(DatasetVersionDTO datasetVersionDTO, String return null; } - private static void writeFullElementList(XMLStreamWriter xmlw, String name, List values) throws XMLStreamException { - //For the simplest Elements we can - if (values != null && !values.isEmpty()) { - for (String value : values) { - xmlw.writeStartElement(name); - xmlw.writeCharacters(value); - xmlw.writeEndElement(); // labl - } - } - } - - private static void writeI18NElementList(XMLStreamWriter xmlw, String name, List values, - String fieldTypeName, String fieldTypeClass, String metadataBlockName, String lang) - throws XMLStreamException { - - if (values != null && !values.isEmpty()) { - Locale defaultLocale = Locale.getDefault(); - for (String value : values) { - if (fieldTypeClass.equals("controlledVocabulary")) { - String localeVal = ControlledVocabularyValue.getLocaleStrValue(value, fieldTypeName, metadataBlockName, defaultLocale, false); - if (localeVal != null) { - - value = localeVal; - writeFullElement(xmlw, name, value, defaultLocale.getLanguage()); - } else { - writeFullElement(xmlw, name, value); - } - } else { - writeFullElement(xmlw, name, value); - } - } - if (lang != null && !defaultLocale.getLanguage().equals(lang)) { - // Get values in dataset metadata language - // Loop before testing fieldTypeClass to be ready for external CVV - for (String value : values) { - if (fieldTypeClass.equals("controlledVocabulary")) { - String localeVal = ControlledVocabularyValue.getLocaleStrValue(value, fieldTypeName, metadataBlockName, new Locale(lang), false); - if (localeVal != null) { - writeFullElement(xmlw, name, localeVal, lang); - } - } - } - } - } - } - - private static void writeI18NElement(XMLStreamWriter xmlw, String name, DatasetVersionDTO version, - String fieldTypeName, String lang) throws XMLStreamException { - // Get the default value - String val = dto2Primitive(version, fieldTypeName); - Locale defaultLocale = Locale.getDefault(); - // Get the language-specific value for the default language - // A null value is returned if this is not a CVV field - String localeVal = dto2Primitive(version, fieldTypeName, defaultLocale); - String requestedLocaleVal = null; - if (lang != null && localeVal != null && !defaultLocale.getLanguage().equals(lang)) { - // Also get the value in the requested locale/lang if that's not the default - // lang. - requestedLocaleVal = dto2Primitive(version, fieldTypeName, new Locale(lang)); - } - // FWIW locale-specific vals will only be non-null for CVV values (at present) - if (localeVal == null && requestedLocaleVal == null) { - // Not CVV/no translations so print without lang tag - writeFullElement(xmlw, name, val); - } else { - // Print in either/both languages if we have values - if (localeVal != null) { - // Print the value for the default locale with it's own lang tag - writeFullElement(xmlw, name, localeVal, defaultLocale.getLanguage()); - } - // Also print in the request lang (i.e. the metadata language for the dataset) if a value exists, print it with a lang tag - if (requestedLocaleVal != null) { - writeFullElement(xmlw, name, requestedLocaleVal, lang); - } - } - } - - private static void writeFullElement(XMLStreamWriter xmlw, String name, String value) throws XMLStreamException { - writeFullElement(xmlw, name, value, null); - } - - private static void writeFullElement (XMLStreamWriter xmlw, String name, String value, String lang) throws XMLStreamException { - //For the simplest Elements we can - if (!StringUtilisEmpty(value)) { - xmlw.writeStartElement(name); - if(DvObjectContainer.isMetadataLanguageSet(lang)) { - writeAttribute(xmlw, "xml:lang", lang); - } - xmlw.writeCharacters(value); - xmlw.writeEndElement(); // labl - } - } - - private static void writeAttribute(XMLStreamWriter xmlw, String name, String value) throws XMLStreamException { - if (!StringUtilisEmpty(value)) { - xmlw.writeAttribute(name, value); - } - } private static boolean StringUtilisEmpty(String str) { if (str == null || str.trim().equals("")) { @@ -1747,14 +1599,14 @@ public static void createDataDscr(XMLStreamWriter xmlw, JsonArray fileDetails) t } private static void createVarGroupDDI(XMLStreamWriter xmlw, JsonObject varGrp) throws XMLStreamException { xmlw.writeStartElement("varGrp"); - writeAttribute(xmlw, "ID", "VG" + varGrp.getJsonNumber("id").toString()); + xmlw.writeAttribute("ID", "VG" + varGrp.getJsonNumber("id").toString()); String vars = ""; JsonArray varsInGroup = varGrp.getJsonArray("dataVariableIds"); for (int j=0;j sumStat : dvar.getJsonObject("summaryStatistics").entrySet()) { xmlw.writeStartElement("sumStat"); - writeAttribute(xmlw, "type", sumStat.getKey()); + XmlWriterUtil.writeAttribute(xmlw, "type", sumStat.getKey()); xmlw.writeCharacters(((JsonString)sumStat.getValue()).getString()); xmlw.writeEndElement(); // sumStat } @@ -1917,7 +1769,7 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f JsonObject varCat = varCats.getJsonObject(i); xmlw.writeStartElement("catgry"); if (varCat.getBoolean("isMissing")) { - writeAttribute(xmlw, "missing", "Y"); + xmlw.writeAttribute("missing", "Y"); } // catValu @@ -1928,7 +1780,7 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f // label if (varCat.containsKey("label")) { xmlw.writeStartElement("labl"); - writeAttribute(xmlw, "level", "category"); + xmlw.writeAttribute("level", "category"); xmlw.writeCharacters(varCat.getString("label")); xmlw.writeEndElement(); // labl } @@ -1936,7 +1788,7 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f // catStat if (varCat.containsKey("frequency")) { xmlw.writeStartElement("catStat"); - writeAttribute(xmlw, "type", "freq"); + xmlw.writeAttribute("type", "freq"); Double freq = varCat.getJsonNumber("frequency").doubleValue(); // if frequency is actually a long value, we want to write "100" instead of // "100.0" @@ -1955,8 +1807,8 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f JsonObject cm = catMetas.getJsonObject(j); if (cm.getString("categoryValue").equals(varCat.getString("value"))) { xmlw.writeStartElement("catStat"); - writeAttribute(xmlw, "wgtd", "wgtd"); - writeAttribute(xmlw, "type", "freq"); + xmlw.writeAttribute("wgtd", "wgtd"); + xmlw.writeAttribute("type", "freq"); xmlw.writeCharacters(cm.getJsonNumber("wFreq").toString()); xmlw.writeEndElement(); // catStat break; @@ -1972,24 +1824,24 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f // varFormat xmlw.writeEmptyElement("varFormat"); if(dvar.containsKey("variableFormatType")) { - writeAttribute(xmlw, "type", dvar.getString("variableFormatType").toLowerCase()); + XmlWriterUtil.writeAttribute(xmlw, "type", dvar.getString("variableFormatType").toLowerCase()); } else { throw new XMLStreamException("Illegal Variable Format Type!"); } if(dvar.containsKey("format")) { - writeAttribute(xmlw, "formatname", dvar.getString("format")); + XmlWriterUtil.writeAttribute(xmlw, "formatname", dvar.getString("format")); } //experiment writeAttribute(xmlw, "schema", dv.getFormatSchema()); if(dvar.containsKey("formatCategory")) { - writeAttribute(xmlw, "category", dvar.getString("formatCategory")); + XmlWriterUtil.writeAttribute(xmlw, "category", dvar.getString("formatCategory")); } // notes if (dvar.containsKey("UNF") && !dvar.getString("UNF").isBlank()) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "subject", "Universal Numeric Fingerprint"); - writeAttribute(xmlw, "level", "variable"); - writeAttribute(xmlw, "type", "Dataverse:UNF"); + xmlw.writeAttribute("subject", "Universal Numeric Fingerprint"); + xmlw.writeAttribute("level", "variable"); + xmlw.writeAttribute("type", "Dataverse:UNF"); xmlw.writeCharacters(dvar.getString("UNF")); xmlw.writeEndElement(); //notes } @@ -2020,8 +1872,8 @@ private static void createFileDscr(XMLStreamWriter xmlw, JsonArray fileDetails) } xmlw.writeStartElement("fileDscr"); String fileId = fileJson.getJsonNumber("id").toString(); - writeAttribute(xmlw, "ID", "f" + fileId); - writeAttribute(xmlw, "URI", dataverseUrl + "/api/access/datafile/" + fileId); + xmlw.writeAttribute("ID", "f" + fileId); + xmlw.writeAttribute("URI", dataverseUrl + "/api/access/datafile/" + fileId); xmlw.writeStartElement("fileTxt"); xmlw.writeStartElement("fileName"); @@ -2064,9 +1916,9 @@ private static void createFileDscr(XMLStreamWriter xmlw, JsonArray fileDetails) // (Universal Numeric Fingerprint) signature: if ((dt!=null) && (dt.containsKey("UNF") && !dt.getString("UNF").isBlank())) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "level", LEVEL_FILE); - writeAttribute(xmlw, "type", NOTE_TYPE_UNF); - writeAttribute(xmlw, "subject", NOTE_SUBJECT_UNF); + xmlw.writeAttribute("level", LEVEL_FILE); + xmlw.writeAttribute("type", NOTE_TYPE_UNF); + xmlw.writeAttribute("subject", NOTE_SUBJECT_UNF); xmlw.writeCharacters(dt.getString("UNF")); xmlw.writeEndElement(); // notes } @@ -2075,9 +1927,9 @@ private static void createFileDscr(XMLStreamWriter xmlw, JsonArray fileDetails) JsonArray tags = fileJson.getJsonArray("tabularTags"); for (int j = 0; j < tags.size(); j++) { xmlw.writeStartElement("notes"); - writeAttribute(xmlw, "level", LEVEL_FILE); - writeAttribute(xmlw, "type", NOTE_TYPE_TAG); - writeAttribute(xmlw, "subject", NOTE_SUBJECT_TAG); + xmlw.writeAttribute("level", LEVEL_FILE); + xmlw.writeAttribute("type", NOTE_TYPE_TAG); + xmlw.writeAttribute("subject", NOTE_SUBJECT_TAG); xmlw.writeCharacters(tags.getString(j)); xmlw.writeEndElement(); // notes } @@ -2091,13 +1943,7 @@ private static void createFileDscr(XMLStreamWriter xmlw, JsonArray fileDetails) - private static boolean checkParentElement(XMLStreamWriter xmlw, String elementName, boolean elementAdded) throws XMLStreamException { - if (!elementAdded) { - xmlw.writeStartElement(elementName); - } - return true; - } public static void datasetHtmlDDI(InputStream datafile, OutputStream outputStream) throws XMLStreamException { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); diff --git a/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java new file mode 100644 index 00000000000..e932307d3d0 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java @@ -0,0 +1,174 @@ +package edu.harvard.iq.dataverse.util.xml; + +import java.util.List; +import java.util.Locale; +import java.util.Map; + +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamWriter; + +import org.apache.commons.lang3.StringUtils; + +import edu.harvard.iq.dataverse.ControlledVocabularyValue; +import edu.harvard.iq.dataverse.DvObjectContainer; +import edu.harvard.iq.dataverse.api.dto.DatasetVersionDTO; +import edu.harvard.iq.dataverse.api.dto.FieldDTO; +import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; + +public class XmlWriterUtil { + + public static void writeFullElementList(XMLStreamWriter xmlw, String name, List values) throws XMLStreamException { + // For the simplest Elements we can + if (values != null && !values.isEmpty()) { + for (String value : values) { + xmlw.writeStartElement(name); + xmlw.writeCharacters(value); + xmlw.writeEndElement(); // labl + } + } + } + + public static void writeI18NElementList(XMLStreamWriter xmlw, String name, List values, + String fieldTypeName, String fieldTypeClass, String metadataBlockName, String lang) + throws XMLStreamException { + + if (values != null && !values.isEmpty()) { + Locale defaultLocale = Locale.getDefault(); + for (String value : values) { + if (fieldTypeClass.equals("controlledVocabulary")) { + String localeVal = ControlledVocabularyValue.getLocaleStrValue(value, fieldTypeName, metadataBlockName, defaultLocale, false); + if (localeVal != null) { + + value = localeVal; + writeFullElement(xmlw, name, value, defaultLocale.getLanguage()); + } else { + writeFullElement(xmlw, name, value); + } + } else { + writeFullElement(xmlw, name, value); + } + } + if (lang != null && !defaultLocale.getLanguage().equals(lang)) { + // Get values in dataset metadata language + // Loop before testing fieldTypeClass to be ready for external CVV + for (String value : values) { + if (fieldTypeClass.equals("controlledVocabulary")) { + String localeVal = ControlledVocabularyValue.getLocaleStrValue(value, fieldTypeName, metadataBlockName, new Locale(lang), false); + if (localeVal != null) { + writeFullElement(xmlw, name, localeVal, lang); + } + } + } + } + } + } + + public static void writeI18NElement(XMLStreamWriter xmlw, String name, DatasetVersionDTO version, + String fieldTypeName, String lang) throws XMLStreamException { + // Get the default value + String val = dto2Primitive(version, fieldTypeName); + Locale defaultLocale = Locale.getDefault(); + // Get the language-specific value for the default language + // A null value is returned if this is not a CVV field + String localeVal = dto2Primitive(version, fieldTypeName, defaultLocale); + String requestedLocaleVal = null; + if (lang != null && localeVal != null && !defaultLocale.getLanguage().equals(lang)) { + // Also get the value in the requested locale/lang if that's not the default + // lang. + requestedLocaleVal = dto2Primitive(version, fieldTypeName, new Locale(lang)); + } + // FWIW locale-specific vals will only be non-null for CVV values (at present) + if (localeVal == null && requestedLocaleVal == null) { + // Not CVV/no translations so print without lang tag + writeFullElement(xmlw, name, val); + } else { + // Print in either/both languages if we have values + if (localeVal != null) { + // Print the value for the default locale with it's own lang tag + writeFullElement(xmlw, name, localeVal, defaultLocale.getLanguage()); + } + // Also print in the request lang (i.e. the metadata language for the dataset) + // if a value exists, print it with a lang tag + if (requestedLocaleVal != null) { + writeFullElement(xmlw, name, requestedLocaleVal, lang); + } + } + } + + public static String dto2Primitive(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName) { + for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { + MetadataBlockDTO value = entry.getValue(); + for (FieldDTO fieldDTO : value.getFields()) { + if (datasetFieldTypeName.equals(fieldDTO.getTypeName())) { + return fieldDTO.getSinglePrimitive(); + } + } + } + return null; + } + + public static String dto2Primitive(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName, Locale locale) { + for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { + MetadataBlockDTO value = entry.getValue(); + for (FieldDTO fieldDTO : value.getFields()) { + if (datasetFieldTypeName.equals(fieldDTO.getTypeName())) { + String rawVal = fieldDTO.getSinglePrimitive(); + if (fieldDTO.isControlledVocabularyField()) { + return ControlledVocabularyValue.getLocaleStrValue(rawVal, datasetFieldTypeName, value.getName(), + locale, false); + } + } + } + } + return null; + } + + public static void writeFullElement(XMLStreamWriter xmlw, String name, String value) throws XMLStreamException { + writeFullElement(xmlw, name, value, null); + } + + public static void writeFullElement(XMLStreamWriter xmlw, String name, String value, String lang) throws XMLStreamException { + // For the simplest Elements we can + if (!StringUtils.isEmpty(value)) { + xmlw.writeStartElement(name); + if (DvObjectContainer.isMetadataLanguageSet(lang)) { + writeAttribute(xmlw, "xml:lang", lang); + } + xmlw.writeCharacters(value); + xmlw.writeEndElement(); // labl + } + } + + public static void writeAttribute(XMLStreamWriter xmlw, String name, String value) throws XMLStreamException { + if (!StringUtils.isEmpty(value)) { + xmlw.writeAttribute(name, value); + } + } + + public static boolean checkParentElement(XMLStreamWriter xmlw, String elementName, boolean elementAdded) throws XMLStreamException { + if (!elementAdded) { + xmlw.writeStartElement(elementName); + } + + return true; + } + + public static void writeFullElementWithAttributes(XMLStreamWriter xmlw, String name, Map attributeMap, String value) throws XMLStreamException { + if (!StringUtils.isEmpty(value)) { + xmlw.writeStartElement(name); + for (String key : attributeMap.keySet()) { + writeAttribute(xmlw, key, attributeMap.get(key)); + } + xmlw.writeCharacters(value); + xmlw.writeEndElement(); // labl + } + } + + public static boolean writeOpenTagIfNeeded(XMLStreamWriter xmlw, String tag, boolean element_check) throws XMLStreamException { + // check if the current tag isn't opened + if (!element_check) { + xmlw.writeStartElement(tag); // + } + return true; + } +} From 1a46155a5ed37545455a194650301cbee5691358 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 3 May 2024 10:32:21 -0400 Subject: [PATCH 003/105] note duplicate method --- .../iq/dataverse/export/openaire/OpenAireExportUtil.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java index 49ceabc5900..820ced3d6c4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java @@ -1428,6 +1428,8 @@ public static void writeFundingReferencesElement(XMLStreamWriter xmlw, DatasetVe writeEndTag(xmlw, fundingReference_check); } + + //Duplicates XmlWriterUtil.dto2Primitive private static String dto2Primitive(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName) { // give the single value of the given metadata for (Map.Entry entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { From ace656ce890d6bd4ecb1b7000995e0934a2c214e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 3 May 2024 10:33:13 -0400 Subject: [PATCH 004/105] remove xml template doc, refactor to generate xml, adding OA fields --- .../pidproviders/doi/XmlMetadataTemplate.java | 819 +++++++++++++----- .../doi/datacite_metadata_template.xml | 2 +- 2 files changed, 617 insertions(+), 204 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 30e4dfd79cc..8a5fe9f9d32 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -1,208 +1,599 @@ package edu.harvard.iq.dataverse.pidproviders.doi; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.net.MalformedURLException; +import java.net.URL; import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; import java.util.List; -import java.util.logging.Level; +import java.util.Map; +import java.util.Optional; import java.util.logging.Logger; +import java.util.stream.Collectors; +import javax.xml.stream.XMLOutputFactory; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamWriter; + +import org.apache.commons.lang3.StringUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; +import org.ocpsoft.common.util.Strings; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetAuthor; +import edu.harvard.iq.dataverse.DatasetField; +import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; +import edu.harvard.iq.dataverse.DatasetFieldConstant; +import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.api.dto.DatasetDTO; +import edu.harvard.iq.dataverse.api.dto.FieldDTO; +import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; +import edu.harvard.iq.dataverse.export.DDIExporter; import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; +import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; +import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; +import edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkPidProvider; +import edu.harvard.iq.dataverse.util.PersonOrOrgUtil; +import edu.harvard.iq.dataverse.util.StringUtil; +import edu.harvard.iq.dataverse.util.xml.XmlPrinter; +import edu.harvard.iq.dataverse.util.xml.XmlWriterUtil; +import jakarta.json.JsonObject; public class XmlMetadataTemplate { - private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.DataCiteMetadataTemplate"); - private static String template; - - static { - try (InputStream in = XmlMetadataTemplate.class.getResourceAsStream("datacite_metadata_template.xml")) { - template = new String(in.readAllBytes(), StandardCharsets.UTF_8); - } catch (Exception e) { - logger.log(Level.SEVERE, "datacite metadata template load error"); - logger.log(Level.SEVERE, "String " + e.toString()); - logger.log(Level.SEVERE, "localized message " + e.getLocalizedMessage()); - logger.log(Level.SEVERE, "cause " + e.getCause()); - logger.log(Level.SEVERE, "message " + e.getMessage()); - } - } + private static final Logger logger = Logger.getLogger(XmlMetadataTemplate.class.getName()); - private String xmlMetadata; - private String identifier; - private List datafileIdentifiers; - private List creators; - private String title; - private String publisher; - private String publisherYear; - private List authors; - private String description; - private List contacts; - private List producers; - - public List getProducers() { - return producers; - } + public static final String XML_NAMESPACE = "http://datacite.org/schema/kernel-4"; + public static final String XML_SCHEMA_LOCATION = "http://datacite.org/schema/kernel-4 http://schema.datacite.org/meta/kernel-4.5/metadata.xsd"; + public static final String XML_XSI = "http://www.w3.org/2001/XMLSchema-instance"; + public static final String XML_SCHEMA_VERSION = "4.5"; - public void setProducers(List producers) { - this.producers = producers; - } + private DoiMetadata doiMetadata; - public List getContacts() { - return contacts; + public XmlMetadataTemplate() { } - public void setContacts(List contacts) { - this.contacts = contacts; + public XmlMetadataTemplate(DoiMetadata doiMetadata) { + this.doiMetadata = doiMetadata; } - public String getDescription() { - return description; + public String generateXML(DvObject dvObject) { + try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) { + generateXML(dvObject, outputStream); + + String xml = outputStream.toString(); + return XmlPrinter.prettyPrintXml(xml); + } catch (XMLStreamException | IOException e) { + logger.severe("Unable to generate DataCite XML for DOI: " + dvObject.getGlobalId().asString() + " : " + e.getMessage()); + e.printStackTrace(); + } + return null; } - public void setDescription(String description) { - this.description = description; - } + private void generateXML(DvObject dvObject, OutputStream outputStream) throws XMLStreamException { + // Could/should use dataset metadata language for metadata from DvObject itself? + String language = null; // machine locale? e.g. for Publisher which is global + String metadataLanguage = null; // when set, otherwise = language? + XMLStreamWriter xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream); + xmlw.writeStartElement("resource"); + xmlw.writeDefaultNamespace(XML_NAMESPACE); + xmlw.writeAttribute("xmlns:xsi", XML_XSI); + xmlw.writeAttribute("xsi:schemaLocation", XML_SCHEMA_LOCATION); + + writeIdentifier(xmlw, dvObject); + writeCreators(xmlw, doiMetadata.getAuthors()); + writeTitles(xmlw, dvObject, language); + writePublisher(xmlw, dvObject); + writePublicationYear(xmlw, dvObject); + writeSubjects(xmlw, dvObject); + writeContributors(xmlw, dvObject); + writeDates(xmlw, dvObject); + writeLanguage(xmlw, dvObject); + writeResourceType(xmlw, dvObject); + writeAlternateIdentifiers(xmlw, dvObject); + writeRelatedIdentifiers(xmlw, dvObject); + writeSize(xmlw, dvObject); + writeFormats(xmlw, dvObject); + writeVersion(xmlw, dvObject); + writeAccessRights(xmlw, dvObject); + writeDescriptions(xmlw, dvObject); + writeGeoLocations(xmlw, dvObject); + writeFundingReferences(xmlw, dvObject); - public List getAuthors() { - return authors; - } + StringBuilder contributorsElement = new StringBuilder(); + if (doiMetadata.getContacts() != null) { + for (String[] contact : doiMetadata.getContacts()) { + if (!contact[0].isEmpty()) { + contributorsElement.append("" + + StringEscapeUtils.escapeXml10(contact[0]) + ""); + if (!contact[1].isEmpty()) { + contributorsElement.append("" + StringEscapeUtils.escapeXml10(contact[1]) + ""); + } + contributorsElement.append(""); + } + } + } - public void setAuthors(List authors) { - this.authors = authors; - } + if (doiMetadata.getProducers() != null) { + for (String[] producer : doiMetadata.getProducers()) { + contributorsElement.append("" + StringEscapeUtils.escapeXml10(producer[0]) + + ""); + if (!producer[1].isEmpty()) { + contributorsElement.append("" + StringEscapeUtils.escapeXml10(producer[1]) + ""); + } + contributorsElement.append(""); + } + } - public XmlMetadataTemplate() { - } + String relIdentifiers = generateRelatedIdentifiers(dvObject); - public List getDatafileIdentifiers() { - return datafileIdentifiers; } - public void setDatafileIdentifiers(List datafileIdentifiers) { - this.datafileIdentifiers = datafileIdentifiers; - } - public XmlMetadataTemplate(String xmlMetaData) { - this.xmlMetadata = xmlMetaData; - Document doc = Jsoup.parseBodyFragment(xmlMetaData); - Elements identifierElements = doc.select("identifier"); - if (identifierElements.size() > 0) { - identifier = identifierElements.get(0).html(); - } - Elements creatorElements = doc.select("creatorName"); - creators = new ArrayList<>(); - for (Element creatorElement : creatorElements) { - creators.add(creatorElement.html()); + /** + * 3, Title(s) (with optional type sub-properties) (M) + * + * @param xmlw + * The Stream writer + * @param dvObject + * The dataset/file + * @param language + * the metadata language + * @return + * @throws XMLStreamException + */ + private void writeTitles(XMLStreamWriter xmlw, DvObject dvObject, String language) throws XMLStreamException { + String title = doiMetadata.getTitle(); + String subTitle = null; + List altTitles = null; + // Only Datasets can have a subtitle or alternative titles + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersion(); + Optional subTitleField = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.subTitle)).findFirst(); + if (subTitleField.isPresent()) { + subTitle = subTitleField.get().getValue(); + } + Optional altTitleField = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.alternativeTitle)).findFirst(); + if (altTitleField.isPresent()) { + altTitles = altTitleField.get().getValues(); + } } - Elements titleElements = doc.select("title"); - if (titleElements.size() > 0) { - title = titleElements.get(0).html(); + + if (StringUtils.isNotBlank(title) || StringUtils.isNotBlank(subTitle) || (altTitles != null && !String.join("", altTitles).isBlank())) { + xmlw.writeStartElement("titles"); + XmlWriterUtil.writeFullElement(xmlw, "title", title, language); + + Map attributes = new HashMap(); + attributes.put("titleType", "Subtitle"); + + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "title", attributes, title); + + attributes.clear(); + attributes.put("titleType", "AlternativeTitle"); + + for (String altTitle : altTitles) { + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "title", attributes, altTitle); + } + + xmlw.writeEndElement(); } - Elements publisherElements = doc.select("publisher"); - if (publisherElements.size() > 0) { - publisher = publisherElements.get(0).html(); + } + + /** + * 1, Identifier (with mandatory type sub-property) (M) Note DataCite expects + * identifierType="DOI" but OpenAire allows several others (see + * https://guidelines.readthedocs.io/en/latest/data/field_identifier.html#d-identifiertype) + * Dataverse is currently only capable of creating DOI, Handle, or URL types + * from the OpenAire list (the last from PermaLinks) ToDo - If we add,e.g., an + * ARK or PURL provider, this code has to change or we'll need to refactor so + * that the identifiertype and id value can be sent via the JSON/ORE + * + * @param xmlw + * The Steam writer + * @param dvObject + * The dataset or file with the PID + * @throws XMLStreamException + */ + private void writeIdentifier(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + GlobalId pid = dvObject.getGlobalId(); + // identifier with identifierType attribute + Map identifier_map = new HashMap(); + String identifierType = null; + String identifier = null; + switch (pid.getProtocol()) { + case AbstractDOIProvider.DOI_PROTOCOL: + identifierType = AbstractDOIProvider.DOI_PROTOCOL.toUpperCase(); + identifier = pid.asRawIdentifier(); + break; + case HandlePidProvider.HDL_PROTOCOL: + identifierType = "Handle"; + identifier = pid.asRawIdentifier(); + break; + case PermaLinkPidProvider.PERMA_PROTOCOL: + identifierType = "URL"; + identifier = pid.asURL(); + break; } - Elements publisherYearElements = doc.select("publicationYear"); - if (publisherYearElements.size() > 0) { - publisherYear = publisherYearElements.get(0).html(); + Map attributeMap = new HashMap(); + attributeMap.put("identifierType", identifierType); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "identifier", attributeMap, identifier); + } + + /** + * 2, Creator (with optional given name, family name, name identifier and + * affiliation sub-properties) (M) + * + * @param xmlw + * The stream writer + * @param authorList + * - the list of authors + * @throws XMLStreamException + */ + public void writeCreators(XMLStreamWriter xmlw, List authorList) throws XMLStreamException { + // creators -> creator -> creatorName with nameType attribute, givenName, + // familyName, nameIdentifier + // write all creators + xmlw.writeStartElement("creators"); // + + if (authorList != null && !authorList.isEmpty()) { + for (DatasetAuthor author : authorList) { + String creatorName = StringEscapeUtils.escapeXml10(author.getName().getDisplayValue()); + String affiliation = null; + if (author.getAffiliation() != null && !author.getAffiliation().getDisplayValue().isEmpty()) { + affiliation = StringEscapeUtils.escapeXml10(author.getAffiliation().getDisplayValue()); + } + String nameIdentifier = null; + String nameIdentifierScheme = null; + if (StringUtils.isNotBlank(author.getIdValue()) && StringUtils.isNotBlank(author.getIdType())) { + nameIdentifier = author.getIdValue(); + if(nameIdentifier != null) { + // Normalizes to the URL form of the identifier, returns null if the identifier + // is not valid given the type + nameIdentifier = author.getIdentifierAsUrl(); + } + nameIdentifierScheme = author.getIdType(); + } + + if (StringUtils.isNotBlank(creatorName)) { + xmlw.writeStartElement("creator"); // + JsonObject creatorObj = PersonOrOrgUtil.getPersonOrOrganization(creatorName, false, + StringUtils.containsIgnoreCase(nameIdentifierScheme, "orcid")); + + writeEntityElements(xmlw, "creator", null, creatorObj, affiliation, nameIdentifier, nameIdentifierScheme); + xmlw.writeEndElement(); // + } + + else { + // Authors unavailable + XmlWriterUtil.writeFullElement(xmlw, "creator", "creatorName", AbstractPidProvider.UNAVAILABLE); + } + } } + xmlw.writeEndElement(); // } - public String generateXML(DvObject dvObject) { + private void writePublisher(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + // publisher should already be non null - :unav if it wasn't available + XmlWriterUtil.writeFullElement(xmlw, "publisher", doiMetadata.getPublisher()); + } + + private void writePublicationYear(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { // Can't use "UNKNOWN" here because DataCite will respond with "[facet // 'pattern'] the value 'unknown' is not accepted by the pattern '[\d]{4}'" - String publisherYearFinal = "9999"; + String pubYear = "9999"; // FIXME: Investigate why this.publisherYear is sometimes null now that pull // request #4606 has been merged. - if (this.publisherYear != null) { + if (doiMetadata.getPublisherYear() != null) { // Added to prevent a NullPointerException when trying to destroy datasets when // using DataCite rather than EZID. - publisherYearFinal = this.publisherYear; + pubYear = doiMetadata.getPublisherYear(); } - xmlMetadata = template.replace("${identifier}", getIdentifier().trim()).replace("${title}", this.title) - .replace("${publisher}", this.publisher).replace("${publisherYear}", publisherYearFinal) - .replace("${description}", this.description); - - StringBuilder creatorsElement = new StringBuilder(); - if (authors != null && !authors.isEmpty()) { - for (DatasetAuthor author : authors) { - creatorsElement.append(""); - creatorsElement.append(author.getName().getDisplayValue()); - creatorsElement.append(""); - - if (author.getIdType() != null && author.getIdValue() != null && !author.getIdType().isEmpty() - && !author.getIdValue().isEmpty() && author.getAffiliation() != null - && !author.getAffiliation().getDisplayValue().isEmpty()) { - - if (author.getIdType().equals("ORCID")) { - creatorsElement.append( - "" - + author.getIdValue() + ""); - } - if (author.getIdType().equals("ISNI")) { - creatorsElement.append( - "" - + author.getIdValue() + ""); - } - if (author.getIdType().equals("LCNA")) { - creatorsElement.append( - "" - + author.getIdValue() + ""); - } - } - if (author.getAffiliation() != null && !author.getAffiliation().getDisplayValue().isEmpty()) { - creatorsElement - .append("" + author.getAffiliation().getDisplayValue() + ""); + XmlWriterUtil.writeFullElement(xmlw, "publicationYear", String.valueOf(pubYear)); + } + + /** + * 6, Subject (with scheme sub-property) R + * + * @param xmlw + * The Steam writer + * @param dvObject + * The Dataset/DataFile + * @throws XMLStreamException + */ + private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + // subjects -> subject with subjectScheme and schemeURI attributes when + // available + boolean subjectsCreated = false; + List subjects = null; + List compoundKeywords = null; + List compoundTopics = null; + // Dataset Subject= Dataverse subject, keyword, and/or topic classification + // fields + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + dv.getDatasetSubjects(); + for (DatasetField dsf : dv.getDatasetFields()) { + if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.keyword)) { + compoundKeywords = dsf.getDatasetFieldCompoundValues(); + } else if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.topicClassification)) { + compoundTopics = dsf.getDatasetFieldCompoundValues(); } - creatorsElement.append(""); } - } else { - creatorsElement.append("").append(AbstractPidProvider.UNAVAILABLE) - .append(""); + } else if (dvObject instanceof DataFile df) { + subjects = df.getTagLabels(); + } + for (String subject : subjects) { + if (StringUtils.isNotBlank(subject)) { + subjectsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "subjects", subjectsCreated); + XmlWriterUtil.writeFullElement(xmlw, "subject", StringEscapeUtils.escapeXml10(subject)); + } } + for (DatasetFieldCompoundValue keywordFieldValue : compoundKeywords) { + String keyword = null; + String scheme = null; + String schemeUri = null; + + for (DatasetField subField : keywordFieldValue.getChildDatasetFields()) { + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.keyword: + keyword = subField.getValue(); + break; + case DatasetFieldConstant.keywordVocab: + scheme = subField.getValue(); + break; + case DatasetFieldConstant.keywordVocabURI: + schemeUri = subField.getValue(); + break; + } + } + if (StringUtils.isNotBlank(keyword)) { + Map attributesMap = new HashMap(); + if (StringUtils.isNotBlank(scheme)) { + attributesMap.put("subjectScheme", scheme); + } + if (StringUtils.isNotBlank(schemeUri)) { + attributesMap.put("schemeURI", schemeUri); + } + subjectsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "subjects", subjectsCreated); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "subject", attributesMap, StringEscapeUtils.escapeXml10(keyword)); + } + } + for (DatasetFieldCompoundValue topicFieldValue : compoundTopics) { + String topic = null; + String scheme = null; + String schemeUri = null; + + for (DatasetField subField : topicFieldValue.getChildDatasetFields()) { + + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.topicClassValue: + topic = subField.getValue(); + break; + case DatasetFieldConstant.topicClassVocab: + scheme = subField.getValue(); + break; + case DatasetFieldConstant.topicClassVocabURI: + schemeUri = subField.getValue(); + break; + } + } + if (StringUtils.isNotBlank(topic)) { + Map attributesMap = new HashMap(); + if (StringUtils.isNotBlank(scheme)) { + attributesMap.put("subjectScheme", scheme); + } + if (StringUtils.isNotBlank(schemeUri)) { + attributesMap.put("schemeURI", schemeUri); + } + subjectsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "subjects", subjectsCreated); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "subject", attributesMap, StringEscapeUtils.escapeXml10(topic)); + } + } + if (subjectsCreated) { + xmlw.writeEndElement(); + } + } - xmlMetadata = xmlMetadata.replace("${creators}", creatorsElement.toString()); + /** + * 7, Contributor (with optional given name, family name, name identifier + * and affiliation sub-properties) + * + * @see #writeContributorElement(javax.xml.stream.XMLStreamWriter, + * java.lang.String, java.lang.String, java.lang.String) + * + * @param xmlw The stream writer + * @param dvObject The Dataset/DataFile + * @throws XMLStreamException + */ + private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + boolean contributorsCreated = false; + List compoundProducers = null; + List compoundDistributors = null; + List compoundContacts = null; + List compoundContributors = null; + // Dataset Subject= Dataverse subject, keyword, and/or topic classification + // fields + //ToDo Include for files? + /*if(dvObject instanceof DataFile df) { + dvObject = df.getOwner(); + }*/ + + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + for (DatasetField dsf : dv.getDatasetFields()) { + switch (dsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.producer: + compoundProducers = dsf.getDatasetFieldCompoundValues(); + break; + case DatasetFieldConstant.distributor: + compoundDistributors = dsf.getDatasetFieldCompoundValues(); + break; + case DatasetFieldConstant.contact: + compoundContacts = dsf.getDatasetFieldCompoundValues(); + break; + case DatasetFieldConstant.contributor: + compoundContributors = dsf.getDatasetFieldCompoundValues(); + } + } + } + + + for (DatasetFieldCompoundValue producerFieldValue : compoundProducers) { + String producer = null; + String affiliation = null; + + for (DatasetField subField : producerFieldValue.getChildDatasetFields()) { + + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.producerName: + producer = subField.getValue(); + break; + case DatasetFieldConstant.producerAffiliation: + affiliation = subField.getValue(); + break; + } + } + if (StringUtils.isNotBlank(producer)) { + contributorsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "contributors", contributorsCreated); + JsonObject entityObject = PersonOrOrgUtil.getPersonOrOrganization(producer, false, false); + writeEntityElements(xmlw, "contributor", "Producer", entityObject, affiliation, null, null); + } - StringBuilder contributorsElement = new StringBuilder(); - if (this.getContacts() != null) { - for (String[] contact : this.getContacts()) { - if (!contact[0].isEmpty()) { - contributorsElement.append("" - + contact[0] + ""); - if (!contact[1].isEmpty()) { - contributorsElement.append("" + contact[1] + ""); - } - contributorsElement.append(""); + } + + for (DatasetFieldCompoundValue distributorFieldValue : compoundDistributors) { + String distributor = null; + String affiliation = null; + + for (DatasetField subField : distributorFieldValue.getChildDatasetFields()) { + + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.distributorName: + distributor = subField.getValue(); + break; + case DatasetFieldConstant.distributorAffiliation: + affiliation = subField.getValue(); + break; } } + if (StringUtils.isNotBlank(distributor)) { + contributorsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "contributors", contributorsCreated); + JsonObject entityObject = PersonOrOrgUtil.getPersonOrOrganization(distributor, false, false); + writeEntityElements(xmlw, "contributor", "Distributor", entityObject, affiliation, null, null); + } + } + for (DatasetFieldCompoundValue contactFieldValue : compoundContacts) { + String contact = null; + String affiliation = null; + + for (DatasetField subField : contactFieldValue.getChildDatasetFields()) { + + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.datasetContactName: + contact = subField.getValue(); + break; + case DatasetFieldConstant.datasetContactAffiliation: + affiliation = subField.getValue(); + break; + } + } + if (StringUtils.isNotBlank(contact)) { + contributorsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "contributors", contributorsCreated); + JsonObject entityObject = PersonOrOrgUtil.getPersonOrOrganization(contact, false, false); + writeEntityElements(xmlw, "contributor", "ContactPerson", entityObject, affiliation, null, null); + } - if (this.getProducers() != null) { - for (String[] producer : this.getProducers()) { - contributorsElement.append("" + producer[0] - + ""); - if (!producer[1].isEmpty()) { - contributorsElement.append("" + producer[1] + ""); + } + for (DatasetFieldCompoundValue contributorFieldValue : compoundContributors) { + String contributor = null; + String contributorType = null; + + for (DatasetField subField : contributorFieldValue.getChildDatasetFields()) { + + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.contributorName: + contributor = subField.getValue(); + break; + case DatasetFieldConstant.contributorType: + contributorType = subField.getValue().replace(" ", ""); + break; } - contributorsElement.append(""); } + // QDR - doesn't have Funder in the contributor type list. + // Using a string isn't i18n + if (StringUtils.isNotBlank(contributor) && !StringUtils.equalsIgnoreCase("Funder", contributorType)) { + contributorsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "contributors", contributorsCreated); + JsonObject entityObject = PersonOrOrgUtil.getPersonOrOrganization(contributor, false, false); + writeEntityElements(xmlw, "contributor", contributorType, entityObject, null, null, null); + } + + } + + if (contributorsCreated) { + xmlw.writeEndElement(); } + } - String relIdentifiers = generateRelatedIdentifiers(dvObject); + private void writeEntityElements(XMLStreamWriter xmlw, String elementName, String type, JsonObject entityObject, String affiliation, String nameIdentifier, String nameIdentifierScheme) throws XMLStreamException { + xmlw.writeStartElement(elementName); + Map attributeMap = new HashMap(); + if (StringUtils.isNotBlank(type)) { + attributeMap.put("contributorType", type); + } + // person name=, + if (entityObject.getBoolean("isPerson")) { + attributeMap.put("nameType", "Personal"); + } else { + attributeMap.put("nameType", "Organizational"); + } + XmlWriterUtil.writeFullElementWithAttributes(xmlw, elementName + "Name", attributeMap, + StringEscapeUtils.escapeXml10(entityObject.getString("fullName"))); + if (entityObject.containsKey("givenName")) { + XmlWriterUtil.writeFullElement(xmlw, "givenName", StringEscapeUtils.escapeXml10(entityObject.getString("givenName"))); + } + if (entityObject.containsKey("familyName")) { + XmlWriterUtil.writeFullElement(xmlw, "familyName", StringEscapeUtils.escapeXml10(entityObject.getString("familyName"))); + } - xmlMetadata = xmlMetadata.replace("${relatedIdentifiers}", relIdentifiers); + if (nameIdentifier != null) { + attributeMap.clear(); + URL url; + try { + url = new URL(nameIdentifier); + String protocol = url.getProtocol(); + String authority = url.getAuthority(); + String site = String.format("%s://%s", protocol, authority); + attributeMap.put("schemeURI", site); + attributeMap.put("nameIdentifierScheme", nameIdentifierScheme); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "nameIdentifier", attributeMap, nameIdentifier); + } catch (MalformedURLException e) { + logger.warning("DatasetAuthor.getIdentifierAsUrl returned a Malformed URL: " + nameIdentifier); + } + } + + if (StringUtils.isNotBlank(affiliation)) { + attributeMap.clear(); + if (affiliation.startsWith("https://ror.org/")) { - xmlMetadata = xmlMetadata.replace("{$contributors}", contributorsElement.toString()); - return xmlMetadata; + attributeMap.put("schemeURI", "https://ror.org"); + attributeMap.put("affiliationIdentifierScheme", "ROR"); + } + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "affiliation", attributeMap, StringEscapeUtils.escapeXml10(affiliation)); + } + xmlw.writeEndElement(); } private String generateRelatedIdentifiers(DvObject dvObject) { @@ -210,9 +601,67 @@ private String generateRelatedIdentifiers(DvObject dvObject) { StringBuilder sb = new StringBuilder(); if (dvObject.isInstanceofDataset()) { Dataset dataset = (Dataset) dvObject; + + List relatedPublications = dataset.getLatestVersionForCopy().getRelatedPublications(); + if (!relatedPublications.isEmpty()) { + for (DatasetRelPublication relatedPub : relatedPublications) { + String pubIdType = relatedPub.getIdType(); + String identifier = relatedPub.getIdNumber(); + /* + * Note - with identifier and url fields, it's not clear that there's a single + * way those two fields are used for all identifier types In QDR, at this time, + * doi and isbn types always have the raw number in the identifier field, + * whereas there are examples where URLs are in the identifier or url fields. + * The code here addresses those practices and is not generic. + */ + if (pubIdType != null) { + switch (pubIdType) { + case "doi": + if (identifier != null && identifier.length() != 0) { + appendIdentifier(sb, "DOI", "IsSupplementTo", "doi:" + identifier); + } + break; + case "isbn": + if (identifier != null && identifier.length() != 0) { + appendIdentifier(sb, "ISBN", "IsSupplementTo", "ISBN:" + identifier); + } + break; + case "url": + if (identifier != null && identifier.length() != 0) { + appendIdentifier(sb, "URL", "IsSupplementTo", identifier); + } else { + String pubUrl = relatedPub.getUrl(); + if (pubUrl != null && pubUrl.length() > 0) { + appendIdentifier(sb, "URL", "IsSupplementTo", pubUrl); + } + } + break; + default: + if (identifier != null && identifier.length() != 0) { + if (pubIdType.equalsIgnoreCase("arXiv")) { + pubIdType = "arXiv"; + } else if (pubIdType.equalsIgnoreCase("handle")) { + // Initial cap required for handle + pubIdType = "Handle"; + } else if (!pubIdType.equals("bibcode")) { + pubIdType = pubIdType.toUpperCase(); + } + // For all others, do a generic attempt to match the identifier type to the + // datacite schema and send the raw identifier as the value + appendIdentifier(sb, pubIdType, "IsSupplementTo", identifier); + } + break; + } + + } else { + logger.info(relatedPub.getIdNumber() + relatedPub.getUrl() + relatedPub.getTitle()); + } + } + } + if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { - datafileIdentifiers = new ArrayList<>(); + List datafileIdentifiers = new ArrayList<>(); for (DataFile dataFile : dataset.getFiles()) { if (dataFile.getGlobalId() != null) { if (sb.toString().isEmpty()) { @@ -229,14 +678,23 @@ private String generateRelatedIdentifiers(DvObject dvObject) { } } else if (dvObject.isInstanceofDataFile()) { DataFile df = (DataFile) dvObject; - sb.append(""); - sb.append("" - + df.getOwner().getGlobalId() + ""); - sb.append(""); + appendIdentifier(sb, "DOI", "IsPartOf", df.getOwner().getGlobalId().asString()); + if (sb.length() != 0) { + // Should always be true + sb.append(""); + } } return sb.toString(); } + + private void appendIdentifier(StringBuilder sb, String idType, String relationType, String identifier) { + if (sb.toString().isEmpty()) { + sb.append(""); + } + sb.append("" + identifier + ""); + } + public void generateFileIdentifiers(DvObject dvObject) { if (dvObject.isInstanceofDataset()) { @@ -244,71 +702,26 @@ public void generateFileIdentifiers(DvObject dvObject) { if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { - datafileIdentifiers = new ArrayList<>(); + List datafileIdentifiers = new ArrayList<>(); for (DataFile dataFile : dataset.getFiles()) { datafileIdentifiers.add(dataFile.getIdentifier()); - int x = xmlMetadata.indexOf("") - 1; - xmlMetadata = xmlMetadata.replace("{relatedIdentifier}", dataFile.getIdentifier()); - xmlMetadata = xmlMetadata.substring(0, x) + "${relatedIdentifier}" - + template.substring(x, template.length() - 1); + // int x = xmlMetadata.indexOf("") - 1; + // xmlMetadata = xmlMetadata.replace("{relatedIdentifier}", + // dataFile.getIdentifier()); + // xmlMetadata = xmlMetadata.substring(0, x) + "${relatedIdentifier}" + // + template.substring(x, template.length() - 1); } } else { - xmlMetadata = xmlMetadata.replace( - "${relatedIdentifier}", - ""); + // xmlMetadata = xmlMetadata.replace( + // "${relatedIdentifier}", + // ""); } } } - public static String getTemplate() { - return template; - } - - public static void setTemplate(String template) { - XmlMetadataTemplate.template = template; - } - - public String getIdentifier() { - return identifier; - } - - public void setIdentifier(String identifier) { - this.identifier = identifier; - } - - public List getCreators() { - return creators; - } - - public void setCreators(List creators) { - this.creators = creators; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } - - public String getPublisher() { - return publisher; - } - - public void setPublisher(String publisher) { - this.publisher = publisher; - } - - public String getPublisherYear() { - return publisherYear; - } - - public void setPublisherYear(String publisherYear) { - this.publisherYear = publisherYear; - } - } \ No newline at end of file diff --git a/src/main/resources/edu/harvard/iq/dataverse/pidproviders/doi/datacite_metadata_template.xml b/src/main/resources/edu/harvard/iq/dataverse/pidproviders/doi/datacite_metadata_template.xml index abe7ce79972..8348691d4c7 100644 --- a/src/main/resources/edu/harvard/iq/dataverse/pidproviders/doi/datacite_metadata_template.xml +++ b/src/main/resources/edu/harvard/iq/dataverse/pidproviders/doi/datacite_metadata_template.xml @@ -1,5 +1,5 @@ - ${identifier} From dba03e2bb1597d4e01317139d950e305d0d9dec5 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 3 May 2024 10:33:40 -0400 Subject: [PATCH 005/105] refactor source of XML info --- .../iq/dataverse/export/DataCiteExporter.java | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/export/DataCiteExporter.java b/src/main/java/edu/harvard/iq/dataverse/export/DataCiteExporter.java index 8caf32b2df0..c21d6b5cd1a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/DataCiteExporter.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/DataCiteExporter.java @@ -7,6 +7,7 @@ import io.gdcc.spi.export.ExportException; import io.gdcc.spi.export.Exporter; import io.gdcc.spi.export.XMLExporter; +import edu.harvard.iq.dataverse.pidproviders.doi.XmlMetadataTemplate; import edu.harvard.iq.dataverse.util.BundleUtil; import java.io.IOException; import java.io.OutputStream; @@ -20,11 +21,7 @@ */ @AutoService(Exporter.class) public class DataCiteExporter implements XMLExporter { - - private static String DEFAULT_XML_NAMESPACE = "http://datacite.org/schema/kernel-3"; - private static String DEFAULT_XML_SCHEMALOCATION = "http://datacite.org/schema/kernel-3 http://schema.datacite.org/meta/kernel-3/metadata.xsd"; - private static String DEFAULT_XML_VERSION = "3.0"; - + public static final String NAME = "Datacite"; @Override @@ -60,17 +57,17 @@ public Boolean isAvailableToUsers() { @Override public String getXMLNameSpace() { - return DataCiteExporter.DEFAULT_XML_NAMESPACE; + return XmlMetadataTemplate.XML_NAMESPACE; } @Override public String getXMLSchemaLocation() { - return DataCiteExporter.DEFAULT_XML_SCHEMALOCATION; + return XmlMetadataTemplate.XML_SCHEMA_LOCATION; } @Override public String getXMLSchemaVersion() { - return DataCiteExporter.DEFAULT_XML_VERSION; + return XmlMetadataTemplate.XML_SCHEMA_VERSION; } } From af3e24b0b7bc1bff2c378f2a682455fe6aef0ee2 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 3 May 2024 10:34:09 -0400 Subject: [PATCH 006/105] add code to get raw alphanumeric pid value --- src/main/java/edu/harvard/iq/dataverse/GlobalId.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java index a542cb52ac0..1c8783c5bd5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/GlobalId.java +++ b/src/main/java/edu/harvard/iq/dataverse/GlobalId.java @@ -100,6 +100,13 @@ public String asURL() { } return null; } + + public String asRawIdentifier() { + if (protocol == null || authority == null || identifier == null) { + return ""; + } + return authority + separator + identifier; + } From fa23884647c893285e456d749a741d6d36ac90eb Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 3 May 2024 13:58:32 -0400 Subject: [PATCH 007/105] remove duplicate method --- .../edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java index e932307d3d0..8ec426ead1f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlWriterUtil.java @@ -145,13 +145,6 @@ public static void writeAttribute(XMLStreamWriter xmlw, String name, String valu } } - public static boolean checkParentElement(XMLStreamWriter xmlw, String elementName, boolean elementAdded) throws XMLStreamException { - if (!elementAdded) { - xmlw.writeStartElement(elementName); - } - - return true; - } public static void writeFullElementWithAttributes(XMLStreamWriter xmlw, String name, Map attributeMap, String value) throws XMLStreamException { if (!StringUtils.isEmpty(value)) { From 0d22d6c580df4aa689b019dfdc88321a59e02e4d Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 3 May 2024 13:59:22 -0400 Subject: [PATCH 008/105] dates, resourceType, alternate Ids --- .../pidproviders/doi/XmlMetadataTemplate.java | 224 +++++++++++++++++- 1 file changed, 215 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 8a5fe9f9d32..92bf7afd273 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -6,12 +6,14 @@ import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; +import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; import java.util.logging.Logger; import java.util.stream.Collectors; @@ -26,6 +28,8 @@ import org.jsoup.select.Elements; import org.ocpsoft.common.util.Strings; +import edu.harvard.iq.dataverse.AlternativePersistentIdentifier; +import edu.harvard.iq.dataverse.ControlledVocabularyValue; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetAuthor; @@ -33,9 +37,11 @@ import edu.harvard.iq.dataverse.DatasetFieldCompoundValue; import edu.harvard.iq.dataverse.DatasetFieldConstant; import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.DatasetFieldValue; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.api.Util; import edu.harvard.iq.dataverse.api.dto.DatasetDTO; import edu.harvard.iq.dataverse.api.dto.FieldDTO; import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; @@ -207,8 +213,6 @@ private void writeTitles(XMLStreamWriter xmlw, DvObject dvObject, String languag */ private void writeIdentifier(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { GlobalId pid = dvObject.getGlobalId(); - // identifier with identifierType attribute - Map identifier_map = new HashMap(); String identifierType = null; String identifier = null; switch (pid.getProtocol()) { @@ -315,9 +319,9 @@ private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLSt // subjects -> subject with subjectScheme and schemeURI attributes when // available boolean subjectsCreated = false; - List subjects = null; - List compoundKeywords = null; - List compoundTopics = null; + List subjects = new ArrayList(); + List compoundKeywords = new ArrayList(); + List compoundTopics = new ArrayList(); // Dataset Subject= Dataverse subject, keyword, and/or topic classification // fields if (dvObject instanceof Dataset d) { @@ -419,10 +423,10 @@ private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLSt */ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { boolean contributorsCreated = false; - List compoundProducers = null; - List compoundDistributors = null; - List compoundContacts = null; - List compoundContributors = null; + List compoundProducers = new ArrayList(); + List compoundDistributors = new ArrayList(); + List compoundContacts = new ArrayList(); + List compoundContributors = new ArrayList(); // Dataset Subject= Dataverse subject, keyword, and/or topic classification // fields //ToDo Include for files? @@ -596,6 +600,208 @@ private void writeEntityElements(XMLStreamWriter xmlw, String elementName, Strin xmlw.writeEndElement(); } + /** + * 8, Date (with type sub-property) (R) + * + * @param xmlw The Steam writer + * @param dvObject The dataset/datafile + * @throws XMLStreamException + */ + private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + boolean datesWritten = false; + String dateOfDistribution = null; + String dateOfProduction = null; + String dateOfDeposit = null; + Date releaseDate = null; + List datesOfCollection = new ArrayList(); + + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + releaseDate = dv.getReleaseTime(); + for (DatasetField dsf : dv.getDatasetFields()) { + switch (dsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.distributionDate: + dateOfDistribution = dsf.getValue(); + break; + case DatasetFieldConstant.productionDate: + dateOfProduction = dsf.getValue(); + break; + case DatasetFieldConstant.dateOfDeposit: + dateOfDeposit = dsf.getValue(); + break; + case DatasetFieldConstant.dateOfCollection: + datesOfCollection = dsf.getDatasetFieldCompoundValues(); + } + } + } + Map attributes = new HashMap(); + if (StringUtils.isNotBlank(dateOfDistribution)) { + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + attributes.put("dateType", "Issued"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, dateOfDistribution); + } + // dates -> date with dateType attribute + + if (StringUtils.isNotBlank(dateOfProduction)) { + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + attributes.put("dateType", "Created"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, dateOfProduction); + } + if (StringUtils.isNotBlank(dateOfDeposit)) { + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + attributes.put("dateType", "Submitted"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, dateOfDeposit); + } + + if (releaseDate != null) { + String date = Util.getDateTimeFormat().format(releaseDate); + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + + attributes.put("dateType", "Available"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, date); + } + if (datesOfCollection != null) { + for (DatasetFieldCompoundValue collectionDateFieldValue : datesOfCollection) { + String startDate = null; + String endDate = null; + + for (DatasetField subField : collectionDateFieldValue.getChildDatasetFields()) { + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.dateOfCollectionStart: + startDate = subField.getValue(); + break; + case DatasetFieldConstant.dateOfCollectionEnd: + endDate = subField.getValue(); + break; + } + } + if (StringUtils.isNotBlank(startDate) || StringUtils.isNotBlank(endDate)) { + datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); + attributes.put("dateType", "Collected"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, (startDate + "/" + endDate).trim()); + } + } + } + if (datesWritten) { + xmlw.writeEndElement(); + } + } + + + // 9, Language (MA), language + private void writeLanguage(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + //Currently not supported. Spec indicates one 'primary' language. Could send the first entry in DatasetFieldConstant.language or send iff there is only one entry, and/or default to the machine's default lang? + return; + } + + // 10, ResourceType (with mandatory general type + // description sub- property) (M) + private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + List kindOfDataValues = new ArrayList(); + Map attributes = new HashMap(); + + attributes.put("resourceTypeGeneral", "Dataset"); + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + for (DatasetField dsf : dv.getDatasetFields()) { + switch (dsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.kindOfData: + kindOfDataValues = dsf.getControlledVocabularyValues(); + break; + } + + if (kindOfDataValues.isEmpty()) { + // Write an attribute only element if there are no kindOfData values. + xmlw.writeStartElement("resourceType"); + xmlw.writeAttribute("resourceTypeGeneral", attributes.get("resourceTypeGeneral")); + xmlw.writeEndElement(); + } else { + for (ControlledVocabularyValue kindOfDataValue : kindOfDataValues) { + String resourceType = kindOfDataValue.getStrValue(); + if (StringUtils.isNotBlank(resourceType)) { + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "resourceType", attributes, resourceType); + } + } + } + } + } + } + + /** + * 11 AlternateIdentifier (with type sub-property) (O) + * + * @param xmlw The Steam writer + * @param dvObject The dataset/datafile + * @throws XMLStreamException + */ + private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + List otherIdentifiers = new ArrayList(); + Set altPids = dvObject.getAlternativePersistentIndentifiers(); + + boolean alternatesWritten = false; + + Map attributes = new HashMap(); + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + for (DatasetField dsf : dv.getDatasetFields()) { + if (DatasetFieldConstant.otherId.equals(dsf.getDatasetFieldType().getName())) { + otherIdentifiers = dsf.getDatasetFieldCompoundValues(); + break; + } + } + } + if (!altPids.isEmpty()) { + alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternativeIdentifiers", alternatesWritten); + } + for (AlternativePersistentIdentifier altPid : altPids) { + String identifierType = null; + String identifier = null; + switch (altPid.getProtocol()) { + case AbstractDOIProvider.DOI_PROTOCOL: + identifierType = AbstractDOIProvider.DOI_PROTOCOL.toUpperCase(); + identifier = altPid.getAuthority() + "/" + altPid.getIdentifier(); + break; + case HandlePidProvider.HDL_PROTOCOL: + identifierType = "Handle"; + identifier = altPid.getAuthority() + "/" + altPid.getIdentifier(); + break; + default: + // The AlternativePersistentIdentifier class isn't really ready for anything but + // doi or handle pids, but will add this as a default. + identifierType = ":unav"; + identifier = altPid.getAuthority() + altPid.getIdentifier(); + break; + } + attributes.put("alternativeIdentifierType", identifierType); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "alternateIdentifier", attributes, identifier); + + } + for (DatasetFieldCompoundValue otherIdentifier : otherIdentifiers) { + String identifierType = null; + String identifier = null; + for (DatasetField subField : otherIdentifier.getChildDatasetFields()) { + identifierType = ":unav"; + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.otherIdAgency: + identifierType = subField.getValue(); + break; + case DatasetFieldConstant.otherIdValue: + identifier = subField.getValue(); + break; + } + } + attributes.put("alternativeIdentifierType", identifierType); + if (!StringUtils.isBlank(identifier)) { + alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternativeIdentifiers", alternatesWritten); + + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "alternateIdentifier", attributes, identifier); + } + } + if (alternatesWritten) { + xmlw.writeEndElement(); + } + } + private String generateRelatedIdentifiers(DvObject dvObject) { StringBuilder sb = new StringBuilder(); From d69bf414f3cc3b5fc8e0214b0c5c4fc6f7ec155e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 8 May 2024 13:19:56 -0400 Subject: [PATCH 009/105] more methods --- .../pidproviders/doi/XmlMetadataTemplate.java | 313 +++++++++++++----- 1 file changed, 223 insertions(+), 90 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 92bf7afd273..eb2465257a1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -47,6 +47,7 @@ import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; import edu.harvard.iq.dataverse.export.DDIExporter; import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; import edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkPidProvider; @@ -146,7 +147,6 @@ private void generateXML(DvObject dvObject, OutputStream outputStream) throws XM } - /** * 3, Title(s) (with optional type sub-properties) (M) * @@ -802,132 +802,265 @@ private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) } } - private String generateRelatedIdentifiers(DvObject dvObject) { + /** + * 12, RelatedIdentifier (with type and relation type sub-properties) (R) + * + * @param xmlw The Steam writer + * @param dvObject the dataset/datafile + * @throws XMLStreamException + */ + private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + + boolean relatedIdentifiersWritten = false; + + Map attributes = new HashMap(); - StringBuilder sb = new StringBuilder(); - if (dvObject.isInstanceofDataset()) { - Dataset dataset = (Dataset) dvObject; + if (dvObject instanceof Dataset dataset) { List relatedPublications = dataset.getLatestVersionForCopy().getRelatedPublications(); if (!relatedPublications.isEmpty()) { for (DatasetRelPublication relatedPub : relatedPublications) { + attributes.clear(); + String pubIdType = relatedPub.getIdType(); String identifier = relatedPub.getIdNumber(); + String url = relatedPub.getUrl(); /* * Note - with identifier and url fields, it's not clear that there's a single - * way those two fields are used for all identifier types In QDR, at this time, - * doi and isbn types always have the raw number in the identifier field, - * whereas there are examples where URLs are in the identifier or url fields. - * The code here addresses those practices and is not generic. + * way those two fields are used for all identifier types. The code here is + * ~best effort to interpret those fields. */ - if (pubIdType != null) { - switch (pubIdType) { - case "doi": - if (identifier != null && identifier.length() != 0) { - appendIdentifier(sb, "DOI", "IsSupplementTo", "doi:" + identifier); - } - break; - case "isbn": - if (identifier != null && identifier.length() != 0) { - appendIdentifier(sb, "ISBN", "IsSupplementTo", "ISBN:" + identifier); - } - break; - case "url": - if (identifier != null && identifier.length() != 0) { - appendIdentifier(sb, "URL", "IsSupplementTo", identifier); - } else { - String pubUrl = relatedPub.getUrl(); - if (pubUrl != null && pubUrl.length() > 0) { - appendIdentifier(sb, "URL", "IsSupplementTo", pubUrl); - } - } - break; - default: - if (identifier != null && identifier.length() != 0) { - if (pubIdType.equalsIgnoreCase("arXiv")) { - pubIdType = "arXiv"; - } else if (pubIdType.equalsIgnoreCase("handle")) { - // Initial cap required for handle - pubIdType = "Handle"; - } else if (!pubIdType.equals("bibcode")) { - pubIdType = pubIdType.toUpperCase(); - } - // For all others, do a generic attempt to match the identifier type to the - // datacite schema and send the raw identifier as the value - appendIdentifier(sb, pubIdType, "IsSupplementTo", identifier); - } - break; + pubIdType = getCanonicalPublicationType(pubIdType); + + // Prefer url if set, otherwise check identifier + String relatedIdentifier = url; + if (StringUtils.isBlank(relatedIdentifier)) { + relatedIdentifier = identifier; + } + // For types where we understand the protocol, get the canonical form + switch (pubIdType) { + case "DOI": + if (!relatedIdentifier.startsWith("doi:") || relatedIdentifier.startsWith("http")) { + relatedIdentifier = "doi:" + relatedIdentifier; + } + try { + GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); + relatedIdentifier = pid.asRawIdentifier(); + } catch (IllegalArgumentException e) { + relatedIdentifier = null; } + break; + case "Handle": + if (!relatedIdentifier.startsWith("hdl:") || relatedIdentifier.startsWith("http")) { + relatedIdentifier = "hdl:" + relatedIdentifier; + } + try { + GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); + relatedIdentifier = pid.asRawIdentifier(); + } catch (IllegalArgumentException e) { + relatedIdentifier = null; + } + break; + case "URL": + break; + default: + + // For non-URL types, if a URL is given, split the string to get a schemeUri + try { + URL relatedUrl = new URL(relatedIdentifier); + String protocol = relatedUrl.getProtocol(); + String authority = relatedUrl.getAuthority(); + String site = String.format("%s://%s", protocol, authority); + relatedIdentifier = relatedIdentifier.substring(site.length()); + attributes.put("schemeURI", site); + } catch (MalformedURLException e) { + // Just an identifier + } + } - } else { - logger.info(relatedPub.getIdNumber() + relatedPub.getUrl() + relatedPub.getTitle()); + if (StringUtils.isNotBlank(relatedIdentifier)) { + // Still have a valid entry + attributes.put("relatedIdentifierType", pubIdType); + attributes.put("relationType", "IsSupplementTo"); + relatedIdentifiersWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "relatedIdentifiers", relatedIdentifiersWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "relatedIdentifier", attributes, relatedIdentifier); } } } - if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { - - List datafileIdentifiers = new ArrayList<>(); + attributes.clear(); + attributes.put("relationType", "HasPart"); for (DataFile dataFile : dataset.getFiles()) { - if (dataFile.getGlobalId() != null) { - if (sb.toString().isEmpty()) { - sb.append(""); + GlobalId pid = dataFile.getGlobalId(); + if (pid != null) { + String pubIdType = getCanonicalPublicationType(pid.getProtocol()); + if (pubIdType != null) { + attributes.put("relatedIdentifierType", pubIdType); + relatedIdentifiersWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "relatedIdentifiers", relatedIdentifiersWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "relatedIdentifier", attributes, pid.asRawIdentifier()); } - sb.append("" - + dataFile.getGlobalId() + ""); } } - - if (!sb.toString().isEmpty()) { - sb.append(""); - } } - } else if (dvObject.isInstanceofDataFile()) { - DataFile df = (DataFile) dvObject; - appendIdentifier(sb, "DOI", "IsPartOf", df.getOwner().getGlobalId().asString()); - if (sb.length() != 0) { - // Should always be true - sb.append(""); + } else if (dvObject instanceof DataFile df) { + GlobalId pid = df.getOwner().getGlobalId(); + if (pid != null) { + String pubIdType = getCanonicalPublicationType(pid.getProtocol()); + if (pubIdType != null) { + + attributes.clear(); + attributes.put("relationType", "IsPartOf"); + relatedIdentifiersWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "relatedIdentifiers", relatedIdentifiersWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "relatedIdentifier", attributes, pid.asRawIdentifier()); + } } } - return sb.toString(); + if (relatedIdentifiersWritten) { + xmlw.writeEndElement(); + } } - private void appendIdentifier(StringBuilder sb, String idType, String relationType, String identifier) { - if (sb.toString().isEmpty()) { - sb.append(""); + static HashMap relatedIdentifierTypeMap = new HashMap(); + + private static String getCanonicalPublicationType(String pubIdType) { + if (relatedIdentifierTypeMap.isEmpty()) { + relatedIdentifierTypeMap.put("ARK".toLowerCase(), "ARK"); + relatedIdentifierTypeMap.put("arXiv", "arXiv"); + relatedIdentifierTypeMap.put("bibcode".toLowerCase(), "bibcode"); + relatedIdentifierTypeMap.put("DOI".toLowerCase(), "DOI"); + relatedIdentifierTypeMap.put("EAN13".toLowerCase(), "EAN13"); + relatedIdentifierTypeMap.put("EISSN".toLowerCase(), "EISSN"); + relatedIdentifierTypeMap.put("Handle".toLowerCase(), "Handle"); + relatedIdentifierTypeMap.put("IGSN".toLowerCase(), "IGSN"); + relatedIdentifierTypeMap.put("ISBN".toLowerCase(), "ISBN"); + relatedIdentifierTypeMap.put("ISSN".toLowerCase(), "ISSN"); + relatedIdentifierTypeMap.put("ISTC".toLowerCase(), "ISTC"); + relatedIdentifierTypeMap.put("LISSN".toLowerCase(), "LISSN"); + relatedIdentifierTypeMap.put("LSID".toLowerCase(), "LSID"); + relatedIdentifierTypeMap.put("PISSN".toLowerCase(), "PISSN"); + relatedIdentifierTypeMap.put("PMID".toLowerCase(), "PMID"); + relatedIdentifierTypeMap.put("PURL".toLowerCase(), "PURL"); + relatedIdentifierTypeMap.put("UPC".toLowerCase(), "UPC"); + relatedIdentifierTypeMap.put("URL".toLowerCase(), "URL"); + relatedIdentifierTypeMap.put("URN".toLowerCase(), "URN"); + relatedIdentifierTypeMap.put("WOS".toLowerCase(), "WOS"); + // Add entry for Handle protocol so this can be used with GlobalId/getProtocol() + relatedIdentifierTypeMap.put("hdl".toLowerCase(), "Handle"); } - sb.append("" + identifier + ""); + return relatedIdentifierTypeMap.get(pubIdType); } - public void generateFileIdentifiers(DvObject dvObject) { + private void writeSize(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + // sizes -> size + boolean sizesWritten = false; + List dataFiles = new ArrayList(); - if (dvObject.isInstanceofDataset()) { - Dataset dataset = (Dataset) dvObject; + if (dvObject instanceof Dataset dataset) { + dataFiles = dataset.getFiles(); + } else if (dvObject instanceof DataFile df) { + dataFiles.add(df); + } + if (dataFiles != null && !dataFiles.isEmpty()) { + for (DataFile dataFile : dataFiles) { + Long size = dataFile.getFilesize(); + if (size != -1) { + sizesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "sizes", sizesWritten); + XmlWriterUtil.writeFullElement(xmlw, "size", size.toString()); + } + } + } + if (sizesWritten) { + xmlw.writeEndElement(); + } - if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { + } - List datafileIdentifiers = new ArrayList<>(); - for (DataFile dataFile : dataset.getFiles()) { - datafileIdentifiers.add(dataFile.getIdentifier()); - // int x = xmlMetadata.indexOf("") - 1; - // xmlMetadata = xmlMetadata.replace("{relatedIdentifier}", - // dataFile.getIdentifier()); - // xmlMetadata = xmlMetadata.substring(0, x) + "${relatedIdentifier}" - // + template.substring(x, template.length() - 1); + private void writeFormats(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + boolean formatsWritten = false; + List dataFiles = new ArrayList(); + + if (dvObject instanceof Dataset dataset) { + dataFiles = dataset.getFiles(); + } else if (dvObject instanceof DataFile df) { + dataFiles.add(df); + } + if (dataFiles != null && !dataFiles.isEmpty()) { + for (DataFile dataFile : dataFiles) { + String format = dataFile.getContentType(); + if (StringUtils.isNotBlank(format)) { + formatsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "formats", formatsWritten); + XmlWriterUtil.writeFullElement(xmlw, "format", format); } + /* Should original formats be sent? What about original sizes above? + if(dataFile.isTabularData()) { + String originalFormat = dataFile.getOriginalFileFormat(); + if(StringUtils.isNotBlank(originalFormat)) { + XmlWriterUtil.writeFullElement(xmlw, "format", format); + } + }*/ + } + } + if (formatsWritten) { + xmlw.writeEndElement(); + } + + } - } else { - // xmlMetadata = xmlMetadata.replace( - // "${relatedIdentifier}", - // ""); + private void writeVersion(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + Dataset d = null; + if(dvObject instanceof Dataset) { + d = (Dataset) dvObject; + } else if (dvObject instanceof DataFile) { + d = ((DataFile) dvObject).getOwner(); + } + if(d !=null) { + DatasetVersion dv = d.getLatestVersionForCopy(); + String version = dv.getFriendlyVersionNumber(); + if (StringUtils.isNotBlank(version)) { + XmlWriterUtil.writeFullElement(xmlw, "version", version); } } + } + private void writeAccessRights(XMLStreamWriter xmlw, DvObject dvObject) { + // rightsList -> rights with rightsURI attribute + xmlw.writeStartElement("rightsList"); // + + // set terms from the info:eu-repo-Access-Terms vocabulary + writeRightsHeader(xmlw, language); + boolean restrict = false; + boolean closed = false; + + if (datasetVersionDTO.isFileAccessRequest()) { + restrict = true; + } + if (datasetVersionDTO.getFiles() != null) { + for (int i = 0; i < datasetVersionDTO.getFiles().size(); i++) { + if (datasetVersionDTO.getFiles().get(i).isRestricted()) { + closed = true; + break; + } + } + } + + if (restrict && closed) { + xmlw.writeAttribute("rightsURI", "info:eu-repo/semantics/restrictedAccess"); + } else if (!restrict && closed) { + xmlw.writeAttribute("rightsURI", "info:eu-repo/semantics/closedAccess"); + } else { + xmlw.writeAttribute("rightsURI", "info:eu-repo/semantics/openAccess"); + } + xmlw.writeEndElement(); // + + writeRightsHeader(xmlw, language); + if (datasetVersionDTO.getLicense() != null) { + xmlw.writeAttribute("rightsURI", datasetVersionDTO.getLicense().getUri()); + xmlw.writeCharacters(datasetVersionDTO.getLicense().getName()); + } + xmlw.writeEndElement(); // + xmlw.writeEndElement(); // + } } \ No newline at end of file From 04b367f641fe8e8da77c8eceafd7a012985f9a1f Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 15 May 2024 10:41:07 -0400 Subject: [PATCH 010/105] only one field to look for --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 943693355a3..d723cf3d528 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1344,6 +1344,7 @@ public List getGeographicCoverage() { } } + break; } return geoCoverages; } From 003431dde79bc7b80077c1aa6d0998329e85f4e3 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 15 May 2024 10:41:30 -0400 Subject: [PATCH 011/105] use common util method --- .../edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index 0c861cb6c09..c0e3057696a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -1715,12 +1715,12 @@ private static void createVarDDI(XMLStreamWriter xmlw, JsonObject dvar, String f // range.getBeginValueType().getName().equals(DB_VAR_RANGE_TYPE_POINT)) { if (range.getBoolean("hasBeginValueType") && range.getBoolean("isBeginValueTypePoint")) { if (range.containsKey("beginValue")) { - invalrngAdded = XmlWriterUtil.checkParentElement(xmlw, "invalrng", invalrngAdded); + invalrngAdded = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "invalrng", invalrngAdded); xmlw.writeEmptyElement("item"); XmlWriterUtil.writeAttribute(xmlw, "VALUE", range.getString("beginValue")); } } else { - invalrngAdded = XmlWriterUtil.checkParentElement(xmlw, "invalrng", invalrngAdded); + invalrngAdded = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "invalrng", invalrngAdded); xmlw.writeEmptyElement("range"); if (range.getBoolean("hasBeginValueType") && range.containsKey("beginValue")) { if (range.getBoolean("isBeginValueTypeMin")) { From fea2f5e01d9a9e3d37f1714e3aaba8dc32f84ab0 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Wed, 15 May 2024 10:42:29 -0400 Subject: [PATCH 012/105] access rights descriptions, geolocations, funding refs --- .../pidproviders/doi/XmlMetadataTemplate.java | 297 +++++++++++++++--- 1 file changed, 249 insertions(+), 48 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index eb2465257a1..be55b7a4837 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -41,16 +41,20 @@ import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.api.Util; import edu.harvard.iq.dataverse.api.dto.DatasetDTO; import edu.harvard.iq.dataverse.api.dto.FieldDTO; import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; +import edu.harvard.iq.dataverse.dataset.DatasetUtil; import edu.harvard.iq.dataverse.export.DDIExporter; +import edu.harvard.iq.dataverse.license.License; import edu.harvard.iq.dataverse.pidproviders.AbstractPidProvider; import edu.harvard.iq.dataverse.pidproviders.PidUtil; import edu.harvard.iq.dataverse.pidproviders.doi.AbstractDOIProvider; import edu.harvard.iq.dataverse.pidproviders.handle.HandlePidProvider; import edu.harvard.iq.dataverse.pidproviders.perma.PermaLinkPidProvider; +import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.PersonOrOrgUtil; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.xml.XmlPrinter; @@ -117,34 +121,6 @@ private void generateXML(DvObject dvObject, OutputStream outputStream) throws XM writeDescriptions(xmlw, dvObject); writeGeoLocations(xmlw, dvObject); writeFundingReferences(xmlw, dvObject); - - StringBuilder contributorsElement = new StringBuilder(); - if (doiMetadata.getContacts() != null) { - for (String[] contact : doiMetadata.getContacts()) { - if (!contact[0].isEmpty()) { - contributorsElement.append("" - + StringEscapeUtils.escapeXml10(contact[0]) + ""); - if (!contact[1].isEmpty()) { - contributorsElement.append("" + StringEscapeUtils.escapeXml10(contact[1]) + ""); - } - contributorsElement.append(""); - } - } - } - - if (doiMetadata.getProducers() != null) { - for (String[] producer : doiMetadata.getProducers()) { - contributorsElement.append("" + StringEscapeUtils.escapeXml10(producer[0]) - + ""); - if (!producer[1].isEmpty()) { - contributorsElement.append("" + StringEscapeUtils.escapeXml10(producer[1]) + ""); - } - contributorsElement.append(""); - } - } - - String relIdentifiers = generateRelatedIdentifiers(dvObject); - } /** @@ -1025,42 +1001,267 @@ private void writeVersion(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStr } - private void writeAccessRights(XMLStreamWriter xmlw, DvObject dvObject) { + private void writeAccessRights(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { // rightsList -> rights with rightsURI attribute xmlw.writeStartElement("rightsList"); // // set terms from the info:eu-repo-Access-Terms vocabulary - writeRightsHeader(xmlw, language); - boolean restrict = false; + xmlw.writeStartElement("rights"); // + DatasetVersion dv = null; boolean closed = false; + if (dvObject instanceof Dataset d) { + dv = d.getLatestVersionForCopy(); + closed = dv.isHasRestrictedFile(); + } else if (dvObject instanceof DataFile df) { + dv = df.getOwner().getLatestVersionForCopy(); - if (datasetVersionDTO.isFileAccessRequest()) { - restrict = true; - } - if (datasetVersionDTO.getFiles() != null) { - for (int i = 0; i < datasetVersionDTO.getFiles().size(); i++) { - if (datasetVersionDTO.getFiles().get(i).isRestricted()) { - closed = true; - break; - } - } + closed = df.isRestricted(); } + TermsOfUseAndAccess terms = dv.getTermsOfUseAndAccess(); + boolean requestsAllowed = terms.isFileAccessRequest(); + License license = terms.getLicense(); - if (restrict && closed) { + if (requestsAllowed && closed) { xmlw.writeAttribute("rightsURI", "info:eu-repo/semantics/restrictedAccess"); - } else if (!restrict && closed) { + } else if (!requestsAllowed && closed) { xmlw.writeAttribute("rightsURI", "info:eu-repo/semantics/closedAccess"); } else { xmlw.writeAttribute("rightsURI", "info:eu-repo/semantics/openAccess"); } xmlw.writeEndElement(); // + xmlw.writeStartElement("rights"); // - writeRightsHeader(xmlw, language); - if (datasetVersionDTO.getLicense() != null) { - xmlw.writeAttribute("rightsURI", datasetVersionDTO.getLicense().getUri()); - xmlw.writeCharacters(datasetVersionDTO.getLicense().getName()); + if (license != null) { + xmlw.writeAttribute("rightsURI", license.getUri().toString()); + xmlw.writeCharacters(license.getName()); + } else { + xmlw.writeAttribute("rightsURI", DatasetUtil.getLicenseURI(dv)); + xmlw.writeCharacters(BundleUtil.getStringFromBundle("license.custom.description")); + ; } xmlw.writeEndElement(); // - xmlw.writeEndElement(); // + xmlw.writeEndElement(); // + } + + private void writeDescriptions(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + // descriptions -> description with descriptionType attribute + boolean descriptionsWritten = false; + List descriptions = null; + DatasetVersion dv = null; + + if (dvObject instanceof Dataset d) { + dv = d.getLatestVersionForCopy(); + dv.getDescriptions(); + } else if (dvObject instanceof DataFile df) { + String description = df.getDescription(); + if (description != null) { + descriptions = new ArrayList(); + descriptions.add(description); + } + } + Map attributes = new HashMap(); + attributes.put("descriptionType", "Abstract"); + for (String description : descriptions) { + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, description); + ; + } + + if (dv != null) { + List dsfs = dv.getDatasetFields(); + + for (DatasetField dsf : dsfs) { + + switch (dsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.software: + attributes.clear(); + attributes.put("descriptionType", "TechnicalInfo"); + List dsfcvs = dsf.getDatasetFieldCompoundValues(); + for (DatasetFieldCompoundValue dsfcv : dsfcvs) { + + String softwareName = null; + String softwareVersion = null; + List childDsfs = dsfcv.getChildDatasetFields(); + for (DatasetField childDsf : childDsfs) { + if (DatasetFieldConstant.softwareName.equals(childDsf.getDatasetFieldType().getName())) { + softwareName = childDsf.getValue(); + } else if (DatasetFieldConstant.softwareVersion.equals(childDsf.getDatasetFieldType().getName())) { + softwareVersion = childDsf.getValue(); + } + } + if (StringUtils.isNotBlank(softwareName)) { + if (StringUtils.isNotBlank(softwareVersion)) { + } + softwareName = softwareName + ", " + softwareVersion; + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, softwareName); + } + } + break; + case DatasetFieldConstant.originOfSources: + case DatasetFieldConstant.characteristicOfSources: + case DatasetFieldConstant.accessToSources: + attributes.clear(); + attributes.put("descriptionType", "Methods"); + String method = dsf.getValue(); + if (StringUtils.isNotBlank(method)) { + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, method); + + } + break; + case DatasetFieldConstant.series: + attributes.clear(); + attributes.put("descriptionType", "SeriesInformation"); + dsfcvs = dsf.getDatasetFieldCompoundValues(); + for (DatasetFieldCompoundValue dsfcv : dsfcvs) { + List childDsfs = dsfcv.getChildDatasetFields(); + for (DatasetField childDsf : childDsfs) { + + if (DatasetFieldConstant.seriesInformation.equals(childDsf.getDatasetFieldType().getName())) { + String seriesInformation = childDsf.getValue(); + if (StringUtils.isNotBlank(seriesInformation)) { + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, seriesInformation); + } + break; + } + } + } + break; + case DatasetFieldConstant.notesText: + attributes.clear(); + attributes.put("descriptionType", "Other"); + String notesText = dsf.getValue(); + if (StringUtils.isNotBlank(notesText)) { + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, notesText); + } + break; + + } + } + + } + + if (descriptionsWritten) { + xmlw.writeEndElement(); // + } + } + + private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + if (dvObject instanceof Dataset d) { + boolean geoLocationsWritten = false; + DatasetVersion dv = d.getLatestVersionForCopy(); + + List places = dv.getGeographicCoverage(); + if (places != null && !places.isEmpty()) { + // geoLocationPlace + geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocation", geoLocationsWritten); + + for (String[] place : places) { + ArrayList placeList = new ArrayList(); + for (String placePart : place) { + placeList.add(placePart); + } + XmlWriterUtil.writeFullElement(xmlw, "geoLocationPlace", Strings.join(placeList, ", ")); + } + } + boolean boundingBoxFound = false; + boolean productionPlaceFound = false; + for (DatasetField dsf : dv.getDatasetFields()) { + switch (dsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.geographicBoundingBox: + boundingBoxFound = true; + for (DatasetFieldCompoundValue dsfcv : dsf.getDatasetFieldCompoundValues()) { + List childDsfs = dsfcv.getChildDatasetFields(); + String nLatitude = null; + String sLatitude = null; + String eLongitude = null; + String wLongitude = null; + for (DatasetField childDsf : childDsfs) { + switch (childDsf.getDatasetFieldType().getName()) { + case DatasetFieldConstant.northLatitude: + nLatitude = childDsf.getValue(); + break; + case DatasetFieldConstant.southLatitude: + sLatitude = childDsf.getValue(); + break; + case DatasetFieldConstant.eastLongitude: + eLongitude = childDsf.getValue(); + break; + case DatasetFieldConstant.westLongitude: + wLongitude = childDsf.getValue(); + + } + } + if (StringUtils.isNoneBlank(wLongitude, eLongitude, nLatitude, sLatitude)) { + geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocation", geoLocationsWritten); + if (wLongitude.equals(eLongitude) && nLatitude.equals(sLatitude)) { + // A point + xmlw.writeStartElement("geoLocationPoint"); + XmlWriterUtil.writeFullElement(xmlw, "pointLongitude", eLongitude); + XmlWriterUtil.writeFullElement(xmlw, "pointLatitude", sLatitude); + xmlw.writeEndElement(); + } else { + // A box + xmlw.writeStartElement("geoLocationBox"); + XmlWriterUtil.writeFullElement(xmlw, "westBoundLongitude", wLongitude); + XmlWriterUtil.writeFullElement(xmlw, "eastBoundLongitude", eLongitude); + XmlWriterUtil.writeFullElement(xmlw, "southBoundLatitude", sLatitude); + XmlWriterUtil.writeFullElement(xmlw, "northBoundLatitude", nLatitude); + xmlw.writeEndElement(); + + } + } + } + case DatasetFieldConstant.productionPlace: + productionPlaceFound = true; + // geoLocationPlace + geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocation", geoLocationsWritten); + List prodPlaces = dsf.getValues(); + for (String prodPlace : prodPlaces) { + XmlWriterUtil.writeFullElement(xmlw, "geoLocationPlace", prodPlace); + } + break; + } + if (boundingBoxFound && productionPlaceFound) { + break; + } + } + if (geoLocationsWritten) { + xmlw.writeEndElement(); // + } + } + + } + + + private void writeFundingReferences(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + // fundingReferences -> fundingReference -> funderName, awardNumber + boolean fundingReferenceWritten = false; + DatasetVersion dv = null; + if (dvObject instanceof Dataset d) { + dv = d.getLatestVersionForCopy(); + } else if (dvObject instanceof DataFile df) { + dv = df.getOwner().getLatestVersionForCopy(); + } + if (dv != null) { + List funders = dv.getFunders(); + if (!funders.isEmpty()) { + + for (String funder : funders) { + if (!StringUtils.isBlank(funder)) { + fundingReferenceWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "fundingReferences", fundingReferenceWritten); + xmlw.writeStartElement("fundingReference"); // + XmlWriterUtil.writeFullElement(xmlw, "funderName", funder); + xmlw.writeEndElement(); // + } + } + if (fundingReferenceWritten) { + xmlw.writeEndElement(); // + } + } + } } } \ No newline at end of file From 3c52b6a2031a55c8840948681930a1824b02820b Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 17 May 2024 15:36:18 -0400 Subject: [PATCH 013/105] altTitles npe --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index be55b7a4837..a2c744be2ed 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -138,7 +138,7 @@ private void generateXML(DvObject dvObject, OutputStream outputStream) throws XM private void writeTitles(XMLStreamWriter xmlw, DvObject dvObject, String language) throws XMLStreamException { String title = doiMetadata.getTitle(); String subTitle = null; - List altTitles = null; + List altTitles = new ArrayList<>(); // Only Datasets can have a subtitle or alternative titles if (dvObject instanceof Dataset d) { DatasetVersion dv = d.getLatestVersion(); From bab2a0d270b766916dfd9578fd25ab05332f0958 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Sat, 18 May 2024 12:25:07 -0400 Subject: [PATCH 014/105] fixes and test --- .../pidproviders/doi/XmlMetadataTemplate.java | 64 ++++++----- .../doi/datacite/XmlMetadataTemplateTest.java | 108 ++++++++++++++++++ 2 files changed, 144 insertions(+), 28 deletions(-) create mode 100644 src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index a2c744be2ed..e9b7b0faa26 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -21,6 +21,7 @@ import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamWriter; + import org.apache.commons.lang3.StringUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; @@ -84,6 +85,7 @@ public String generateXML(DvObject dvObject) { generateXML(dvObject, outputStream); String xml = outputStream.toString(); + logger.info(xml); return XmlPrinter.prettyPrintXml(xml); } catch (XMLStreamException | IOException e) { logger.severe("Unable to generate DataCite XML for DOI: " + dvObject.getGlobalId().asString() + " : " + e.getMessage()); @@ -98,10 +100,11 @@ private void generateXML(DvObject dvObject, OutputStream outputStream) throws XM String metadataLanguage = null; // when set, otherwise = language? XMLStreamWriter xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream); xmlw.writeStartElement("resource"); + xmlw.writeDefaultNamespace(XML_NAMESPACE); xmlw.writeAttribute("xmlns:xsi", XML_XSI); xmlw.writeAttribute("xsi:schemaLocation", XML_SCHEMA_LOCATION); - + writeIdentifier(xmlw, dvObject); writeCreators(xmlw, doiMetadata.getAuthors()); writeTitles(xmlw, dvObject, language); @@ -121,6 +124,8 @@ private void generateXML(DvObject dvObject, OutputStream outputStream) throws XM writeDescriptions(xmlw, dvObject); writeGeoLocations(xmlw, dvObject); writeFundingReferences(xmlw, dvObject); + xmlw.writeEndElement(); + xmlw.flush(); } /** @@ -726,32 +731,34 @@ private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) } } } - if (!altPids.isEmpty()) { + + if (altPids != null && !altPids.isEmpty()) { alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternativeIdentifiers", alternatesWritten); - } - for (AlternativePersistentIdentifier altPid : altPids) { - String identifierType = null; - String identifier = null; - switch (altPid.getProtocol()) { - case AbstractDOIProvider.DOI_PROTOCOL: - identifierType = AbstractDOIProvider.DOI_PROTOCOL.toUpperCase(); - identifier = altPid.getAuthority() + "/" + altPid.getIdentifier(); - break; - case HandlePidProvider.HDL_PROTOCOL: - identifierType = "Handle"; - identifier = altPid.getAuthority() + "/" + altPid.getIdentifier(); - break; - default: - // The AlternativePersistentIdentifier class isn't really ready for anything but - // doi or handle pids, but will add this as a default. - identifierType = ":unav"; - identifier = altPid.getAuthority() + altPid.getIdentifier(); - break; - } - attributes.put("alternativeIdentifierType", identifierType); - XmlWriterUtil.writeFullElementWithAttributes(xmlw, "alternateIdentifier", attributes, identifier); + for (AlternativePersistentIdentifier altPid : altPids) { + String identifierType = null; + String identifier = null; + switch (altPid.getProtocol()) { + case AbstractDOIProvider.DOI_PROTOCOL: + identifierType = AbstractDOIProvider.DOI_PROTOCOL.toUpperCase(); + identifier = altPid.getAuthority() + "/" + altPid.getIdentifier(); + break; + case HandlePidProvider.HDL_PROTOCOL: + identifierType = "Handle"; + identifier = altPid.getAuthority() + "/" + altPid.getIdentifier(); + break; + default: + // The AlternativePersistentIdentifier class isn't really ready for anything but + // doi or handle pids, but will add this as a default. + identifierType = ":unav"; + identifier = altPid.getAuthority() + altPid.getIdentifier(); + break; + } + attributes.put("alternativeIdentifierType", identifierType); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "alternateIdentifier", attributes, identifier); + } } + for (DatasetFieldCompoundValue otherIdentifier : otherIdentifiers) { String identifierType = null; String identifier = null; @@ -1061,10 +1068,11 @@ private void writeDescriptions(XMLStreamWriter xmlw, DvObject dvObject) throws X } Map attributes = new HashMap(); attributes.put("descriptionType", "Abstract"); - for (String description : descriptions) { - descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); - XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, description); - ; + if (descriptions != null) { + for (String description : descriptions) { + descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, description); + } } if (dv != null) { diff --git a/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java b/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java new file mode 100644 index 00000000000..e576398a474 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java @@ -0,0 +1,108 @@ +package edu.harvard.iq.dataverse.pidproviders.doi.datacite; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetField; +import edu.harvard.iq.dataverse.DatasetFieldConstant; +import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.DatasetVersion; +import edu.harvard.iq.dataverse.DatasetVersion.VersionState; +import edu.harvard.iq.dataverse.DataverseServiceBean; +import edu.harvard.iq.dataverse.GlobalId; +import edu.harvard.iq.dataverse.TermsOfUseAndAccess; +import edu.harvard.iq.dataverse.branding.BrandingUtil; +import edu.harvard.iq.dataverse.pidproviders.PidProviderFactoryBean; +import edu.harvard.iq.dataverse.pidproviders.PidUtil; +import edu.harvard.iq.dataverse.pidproviders.doi.DoiMetadata; +import edu.harvard.iq.dataverse.pidproviders.doi.XmlMetadataTemplate; +import edu.harvard.iq.dataverse.settings.JvmSettings; +import edu.harvard.iq.dataverse.settings.SettingsServiceBean; +import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.testing.JvmSetting; +import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.extension.ExtendWith; + +import org.mockito.Mockito; +import org.mockito.junit.jupiter.MockitoExtension; + +import static org.junit.jupiter.api.Assertions.*; +import static org.mockito.ArgumentMatchers.any; + +@ExtendWith(MockitoExtension.class) +@LocalJvmSettings +@JvmSetting(key = JvmSettings.SITE_URL, value = "https://example.com") + +public class XmlMetadataTemplateTest { + + static DataverseServiceBean dataverseSvc; + static SettingsServiceBean settingsSvc; + static PidProviderFactoryBean pidService; + static final String DEFAULT_NAME = "LibraScholar"; + + @BeforeAll + public static void setupMocks() { + dataverseSvc = Mockito.mock(DataverseServiceBean.class); + settingsSvc = Mockito.mock(SettingsServiceBean.class); + BrandingUtil.injectServices(dataverseSvc, settingsSvc); + + // initial values (needed here for other tests where this method is reused!) + Mockito.when(settingsSvc.getValueForKey(SettingsServiceBean.Key.InstallationName)).thenReturn(DEFAULT_NAME); + Mockito.when(dataverseSvc.getRootDataverseName()).thenReturn(DEFAULT_NAME); + + pidService = Mockito.mock(PidProviderFactoryBean.class); + Mockito.when(pidService.isGlobalIdLocallyUnique(any(GlobalId.class))).thenReturn(true); + Mockito.when(pidService.getProducer()).thenReturn("RootDataverse"); + + } + + /** + */ + @Test + public void testDataCiteXMLCreation() throws IOException { + DoiMetadata doiMetadata = new DoiMetadata(); + doiMetadata.setTitle("A Title"); + List creators = new ArrayList(); + creators.add("Alice"); + creators.add("Bob"); + doiMetadata.setCreators(creators); + doiMetadata.setPublisher("Dataverse"); + XmlMetadataTemplate template = new XmlMetadataTemplate(doiMetadata); + + Dataset d = new Dataset(); + GlobalId doi = new GlobalId("doi", "10.5072", "FK2/ABCDEF", null, null, null); + d.setGlobalId(doi); + DatasetVersion dv = new DatasetVersion(); + TermsOfUseAndAccess toa = new TermsOfUseAndAccess(); + toa.setTermsOfUse("Some terms"); + dv.setTermsOfUseAndAccess(toa); + dv.setDataset(d); + DatasetFieldType primitiveDSFType = new DatasetFieldType(DatasetFieldConstant.title, + DatasetFieldType.FieldType.TEXT, false); + DatasetField testDatasetField = new DatasetField(); + + dv.setVersionState(VersionState.DRAFT); + + testDatasetField.setDatasetVersion(dv); + testDatasetField.setDatasetFieldType(primitiveDSFType); + testDatasetField.setSingleValue("First Title"); + List fields = new ArrayList<>(); + fields.add(testDatasetField); + dv.setDatasetFields(fields); + ArrayList dsvs = new ArrayList<>(); + dsvs.add(0, dv); + d.setVersions(dsvs); + + String xml = template.generateXML(d); + System.out.println("Output is " + xml); + + } + +} From 3cca63d2f6ff4052852876d9ccfe52424d2da615 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 20 May 2024 14:55:46 -0400 Subject: [PATCH 015/105] fix for empty rel pub entry --- .../pidproviders/doi/XmlMetadataTemplate.java | 179 +++++++++--------- 1 file changed, 94 insertions(+), 85 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index e9b7b0faa26..8725feca546 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -21,7 +21,6 @@ import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamWriter; - import org.apache.commons.lang3.StringUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; @@ -97,14 +96,14 @@ public String generateXML(DvObject dvObject) { private void generateXML(DvObject dvObject, OutputStream outputStream) throws XMLStreamException { // Could/should use dataset metadata language for metadata from DvObject itself? String language = null; // machine locale? e.g. for Publisher which is global - String metadataLanguage = null; // when set, otherwise = language? + String metadataLanguage = null; // when set, otherwise = language? XMLStreamWriter xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream); xmlw.writeStartElement("resource"); - + xmlw.writeDefaultNamespace(XML_NAMESPACE); xmlw.writeAttribute("xmlns:xsi", XML_XSI); xmlw.writeAttribute("xsi:schemaLocation", XML_SCHEMA_LOCATION); - + writeIdentifier(xmlw, dvObject); writeCreators(xmlw, doiMetadata.getAuthors()); writeTitles(xmlw, dvObject, language); @@ -242,7 +241,7 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList) String nameIdentifierScheme = null; if (StringUtils.isNotBlank(author.getIdValue()) && StringUtils.isNotBlank(author.getIdType())) { nameIdentifier = author.getIdValue(); - if(nameIdentifier != null) { + if (nameIdentifier != null) { // Normalizes to the URL form of the identifier, returns null if the identifier // is not valid given the type nameIdentifier = author.getIdentifierAsUrl(); @@ -392,14 +391,16 @@ private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLSt } /** - * 7, Contributor (with optional given name, family name, name identifier - * and affiliation sub-properties) + * 7, Contributor (with optional given name, family name, name identifier and + * affiliation sub-properties) * * @see #writeContributorElement(javax.xml.stream.XMLStreamWriter, - * java.lang.String, java.lang.String, java.lang.String) + * java.lang.String, java.lang.String, java.lang.String) * - * @param xmlw The stream writer - * @param dvObject The Dataset/DataFile + * @param xmlw + * The stream writer + * @param dvObject + * The Dataset/DataFile * @throws XMLStreamException */ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { @@ -410,11 +411,11 @@ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws X List compoundContributors = new ArrayList(); // Dataset Subject= Dataverse subject, keyword, and/or topic classification // fields - //ToDo Include for files? - /*if(dvObject instanceof DataFile df) { - dvObject = df.getOwner(); - }*/ - + // ToDo Include for files? + /* + * if(dvObject instanceof DataFile df) { dvObject = df.getOwner(); } + */ + if (dvObject instanceof Dataset d) { DatasetVersion dv = d.getLatestVersionForCopy(); for (DatasetField dsf : dv.getDatasetFields()) { @@ -433,8 +434,7 @@ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws X } } } - - + for (DatasetFieldCompoundValue producerFieldValue : compoundProducers) { String producer = null; String affiliation = null; @@ -457,7 +457,7 @@ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws X } } - + for (DatasetFieldCompoundValue distributorFieldValue : compoundDistributors) { String distributor = null; String affiliation = null; @@ -517,7 +517,7 @@ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws X break; } } - // QDR - doesn't have Funder in the contributor type list. + // QDR - doesn't have Funder in the contributor type list. // Using a string isn't i18n if (StringUtils.isNotBlank(contributor) && !StringUtils.equalsIgnoreCase("Funder", contributorType)) { contributorsCreated = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "contributors", contributorsCreated); @@ -526,7 +526,7 @@ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws X } } - + if (contributorsCreated) { xmlw.writeEndElement(); } @@ -568,7 +568,7 @@ private void writeEntityElements(XMLStreamWriter xmlw, String elementName, Strin logger.warning("DatasetAuthor.getIdentifierAsUrl returned a Malformed URL: " + nameIdentifier); } } - + if (StringUtils.isNotBlank(affiliation)) { attributeMap.clear(); if (affiliation.startsWith("https://ror.org/")) { @@ -584,8 +584,10 @@ private void writeEntityElements(XMLStreamWriter xmlw, String elementName, Strin /** * 8, Date (with type sub-property) (R) * - * @param xmlw The Steam writer - * @param dvObject The dataset/datafile + * @param xmlw + * The Steam writer + * @param dvObject + * The dataset/datafile * @throws XMLStreamException */ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { @@ -668,15 +670,16 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea } } - // 9, Language (MA), language private void writeLanguage(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { - //Currently not supported. Spec indicates one 'primary' language. Could send the first entry in DatasetFieldConstant.language or send iff there is only one entry, and/or default to the machine's default lang? + // Currently not supported. Spec indicates one 'primary' language. Could send + // the first entry in DatasetFieldConstant.language or send iff there is only + // one entry, and/or default to the machine's default lang? return; } - - // 10, ResourceType (with mandatory general type - // description sub- property) (M) + + // 10, ResourceType (with mandatory general type + // description sub- property) (M) private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { List kindOfDataValues = new ArrayList(); Map attributes = new HashMap(); @@ -711,8 +714,10 @@ private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws X /** * 11 AlternateIdentifier (with type sub-property) (O) * - * @param xmlw The Steam writer - * @param dvObject The dataset/datafile + * @param xmlw + * The Steam writer + * @param dvObject + * The dataset/datafile * @throws XMLStreamException */ private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { @@ -731,7 +736,7 @@ private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) } } } - + if (altPids != null && !altPids.isEmpty()) { alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternativeIdentifiers", alternatesWritten); for (AlternativePersistentIdentifier altPid : altPids) { @@ -788,8 +793,10 @@ private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) /** * 12, RelatedIdentifier (with type and relation type sub-properties) (R) * - * @param xmlw The Steam writer - * @param dvObject the dataset/datafile + * @param xmlw + * The Steam writer + * @param dvObject + * the dataset/datafile * @throws XMLStreamException */ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { @@ -813,6 +820,7 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th * way those two fields are used for all identifier types. The code here is * ~best effort to interpret those fields. */ + pubIdType = getCanonicalPublicationType(pubIdType); // Prefer url if set, otherwise check identifier @@ -821,49 +829,52 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th relatedIdentifier = identifier; } // For types where we understand the protocol, get the canonical form - switch (pubIdType) { - case "DOI": - if (!relatedIdentifier.startsWith("doi:") || relatedIdentifier.startsWith("http")) { - relatedIdentifier = "doi:" + relatedIdentifier; - } - try { - GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); - relatedIdentifier = pid.asRawIdentifier(); - } catch (IllegalArgumentException e) { - relatedIdentifier = null; - } - break; - case "Handle": - if (!relatedIdentifier.startsWith("hdl:") || relatedIdentifier.startsWith("http")) { - relatedIdentifier = "hdl:" + relatedIdentifier; - } - try { - GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); - relatedIdentifier = pid.asRawIdentifier(); - } catch (IllegalArgumentException e) { - relatedIdentifier = null; - } - break; - case "URL": - break; - default: - - // For non-URL types, if a URL is given, split the string to get a schemeUri - try { - URL relatedUrl = new URL(relatedIdentifier); - String protocol = relatedUrl.getProtocol(); - String authority = relatedUrl.getAuthority(); - String site = String.format("%s://%s", protocol, authority); - relatedIdentifier = relatedIdentifier.substring(site.length()); - attributes.put("schemeURI", site); - } catch (MalformedURLException e) { - // Just an identifier + if (pubIdType != null) { + switch (pubIdType) { + case "DOI": + if (!relatedIdentifier.startsWith("doi:") || relatedIdentifier.startsWith("http")) { + relatedIdentifier = "doi:" + relatedIdentifier; + } + try { + GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); + relatedIdentifier = pid.asRawIdentifier(); + } catch (IllegalArgumentException e) { + relatedIdentifier = null; + } + break; + case "Handle": + if (!relatedIdentifier.startsWith("hdl:") || relatedIdentifier.startsWith("http")) { + relatedIdentifier = "hdl:" + relatedIdentifier; + } + try { + GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); + relatedIdentifier = pid.asRawIdentifier(); + } catch (IllegalArgumentException e) { + relatedIdentifier = null; + } + break; + case "URL": + break; + default: + + // For non-URL types, if a URL is given, split the string to get a schemeUri + try { + URL relatedUrl = new URL(relatedIdentifier); + String protocol = relatedUrl.getProtocol(); + String authority = relatedUrl.getAuthority(); + String site = String.format("%s://%s", protocol, authority); + relatedIdentifier = relatedIdentifier.substring(site.length()); + attributes.put("schemeURI", site); + } catch (MalformedURLException e) { + // Just an identifier + } } } - if (StringUtils.isNotBlank(relatedIdentifier)) { // Still have a valid entry - attributes.put("relatedIdentifierType", pubIdType); + if (pubIdType != null) { + attributes.put("relatedIdentifierType", pubIdType); + } attributes.put("relationType", "IsSupplementTo"); relatedIdentifiersWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "relatedIdentifiers", relatedIdentifiersWritten); XmlWriterUtil.writeFullElementWithAttributes(xmlw, "relatedIdentifier", attributes, relatedIdentifier); @@ -905,7 +916,7 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th static HashMap relatedIdentifierTypeMap = new HashMap(); - + private static String getCanonicalPublicationType(String pubIdType) { if (relatedIdentifierTypeMap.isEmpty()) { relatedIdentifierTypeMap.put("ARK".toLowerCase(), "ARK"); @@ -976,13 +987,12 @@ private void writeFormats(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStr formatsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "formats", formatsWritten); XmlWriterUtil.writeFullElement(xmlw, "format", format); } - /* Should original formats be sent? What about original sizes above? - if(dataFile.isTabularData()) { - String originalFormat = dataFile.getOriginalFileFormat(); - if(StringUtils.isNotBlank(originalFormat)) { - XmlWriterUtil.writeFullElement(xmlw, "format", format); - } - }*/ + /* + * Should original formats be sent? What about original sizes above? + * if(dataFile.isTabularData()) { String originalFormat = + * dataFile.getOriginalFileFormat(); if(StringUtils.isNotBlank(originalFormat)) + * { XmlWriterUtil.writeFullElement(xmlw, "format", format); } } + */ } } if (formatsWritten) { @@ -993,19 +1003,19 @@ private void writeFormats(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStr private void writeVersion(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { Dataset d = null; - if(dvObject instanceof Dataset) { + if (dvObject instanceof Dataset) { d = (Dataset) dvObject; } else if (dvObject instanceof DataFile) { d = ((DataFile) dvObject).getOwner(); } - if(d !=null) { + if (d != null) { DatasetVersion dv = d.getLatestVersionForCopy(); - String version = dv.getFriendlyVersionNumber(); + String version = dv.getFriendlyVersionNumber(); if (StringUtils.isNotBlank(version)) { XmlWriterUtil.writeFullElement(xmlw, "version", version); } } - + } private void writeAccessRights(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { @@ -1244,7 +1254,6 @@ private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws X } - private void writeFundingReferences(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { // fundingReferences -> fundingReference -> funderName, awardNumber boolean fundingReferenceWritten = false; From 30c80a9a5a27c51d8ca8130375d358aadb447a4f Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 20 May 2024 15:32:13 -0400 Subject: [PATCH 016/105] bugs: remove bad nesting, dupe values --- .../pidproviders/doi/XmlMetadataTemplate.java | 51 +++++++++---------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 8725feca546..74da57094c4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -158,20 +158,22 @@ private void writeTitles(XMLStreamWriter xmlw, DvObject dvObject, String languag if (StringUtils.isNotBlank(title) || StringUtils.isNotBlank(subTitle) || (altTitles != null && !String.join("", altTitles).isBlank())) { xmlw.writeStartElement("titles"); - XmlWriterUtil.writeFullElement(xmlw, "title", title, language); - + if (StringUtils.isNotBlank(title)) { + XmlWriterUtil.writeFullElement(xmlw, "title", title, language); + } Map attributes = new HashMap(); - attributes.put("titleType", "Subtitle"); - - XmlWriterUtil.writeFullElementWithAttributes(xmlw, "title", attributes, title); - - attributes.clear(); - attributes.put("titleType", "AlternativeTitle"); - for (String altTitle : altTitles) { - XmlWriterUtil.writeFullElementWithAttributes(xmlw, "title", attributes, altTitle); + if (StringUtils.isNotBlank(subTitle)) { + attributes.put("titleType", "Subtitle"); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "title", attributes, subTitle); + } + if ((altTitles != null && !String.join("", altTitles).isBlank())) { + attributes.clear(); + attributes.put("titleType", "AlternativeTitle"); + for (String altTitle : altTitles) { + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "title", attributes, altTitle); + } } - xmlw.writeEndElement(); } } @@ -250,12 +252,9 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList) } if (StringUtils.isNotBlank(creatorName)) { - xmlw.writeStartElement("creator"); // JsonObject creatorObj = PersonOrOrgUtil.getPersonOrOrganization(creatorName, false, StringUtils.containsIgnoreCase(nameIdentifierScheme, "orcid")); - writeEntityElements(xmlw, "creator", null, creatorObj, affiliation, nameIdentifier, nameIdentifierScheme); - xmlw.writeEndElement(); // } else { @@ -693,21 +692,21 @@ private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws X kindOfDataValues = dsf.getControlledVocabularyValues(); break; } - - if (kindOfDataValues.isEmpty()) { - // Write an attribute only element if there are no kindOfData values. - xmlw.writeStartElement("resourceType"); - xmlw.writeAttribute("resourceTypeGeneral", attributes.get("resourceTypeGeneral")); - xmlw.writeEndElement(); - } else { - for (ControlledVocabularyValue kindOfDataValue : kindOfDataValues) { - String resourceType = kindOfDataValue.getStrValue(); - if (StringUtils.isNotBlank(resourceType)) { - XmlWriterUtil.writeFullElementWithAttributes(xmlw, "resourceType", attributes, resourceType); - } + } + if (kindOfDataValues.isEmpty()) { + // Write an attribute only element if there are no kindOfData values. + xmlw.writeStartElement("resourceType"); + xmlw.writeAttribute("resourceTypeGeneral", attributes.get("resourceTypeGeneral")); + xmlw.writeEndElement(); + } else { + for (ControlledVocabularyValue kindOfDataValue : kindOfDataValues) { + String resourceType = kindOfDataValue.getStrValue(); + if (StringUtils.isNotBlank(resourceType)) { + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "resourceType", attributes, resourceType); } } } + } } From a2acdebbac758317b5d2d07fc1af01859f8bfa85 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 20 May 2024 16:54:02 -0400 Subject: [PATCH 017/105] add XML Validation to test --- .../iq/dataverse/util/xml/XmlValidator.java | 5 +++ .../doi/datacite/XmlMetadataTemplateTest.java | 45 +++++++++++++++++-- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlValidator.java b/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlValidator.java index 586ca50b6fd..cec64ab95b7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlValidator.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/xml/XmlValidator.java @@ -24,7 +24,12 @@ public class XmlValidator { private static final Logger logger = Logger.getLogger(XmlValidator.class.getCanonicalName()); public static boolean validateXmlSchema(String fileToValidate, URL schemaToValidateAgainst) throws MalformedURLException, SAXException, IOException { + Source xmlFile = new StreamSource(new File(fileToValidate)); + return validateXmlSchema(xmlFile, schemaToValidateAgainst); + } + + public static boolean validateXmlSchema(Source xmlFile, URL schemaToValidateAgainst) throws MalformedURLException, SAXException, IOException { SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI); Schema schema = schemaFactory.newSchema(schemaToValidateAgainst); Validator validator = schema.newValidator(); diff --git a/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java b/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java index e576398a474..c1bbc3bebc1 100644 --- a/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/pidproviders/doi/datacite/XmlMetadataTemplateTest.java @@ -1,9 +1,11 @@ package edu.harvard.iq.dataverse.pidproviders.doi.datacite; import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetAuthor; import edu.harvard.iq.dataverse.DatasetField; import edu.harvard.iq.dataverse.DatasetFieldConstant; import edu.harvard.iq.dataverse.DatasetFieldType; +import edu.harvard.iq.dataverse.DatasetFieldType.FieldType; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DatasetVersion.VersionState; import edu.harvard.iq.dataverse.DataverseServiceBean; @@ -19,12 +21,17 @@ import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.testing.JvmSetting; import edu.harvard.iq.dataverse.util.testing.LocalJvmSettings; +import edu.harvard.iq.dataverse.util.xml.XmlValidator; import java.io.IOException; +import java.io.StringReader; +import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.Map; +import javax.xml.transform.stream.StreamSource; + import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.BeforeAll; @@ -32,6 +39,7 @@ import org.mockito.Mockito; import org.mockito.junit.jupiter.MockitoExtension; +import org.xml.sax.SAXException; import static org.junit.jupiter.api.Assertions.*; import static org.mockito.ArgumentMatchers.any; @@ -69,10 +77,32 @@ public static void setupMocks() { public void testDataCiteXMLCreation() throws IOException { DoiMetadata doiMetadata = new DoiMetadata(); doiMetadata.setTitle("A Title"); - List creators = new ArrayList(); - creators.add("Alice"); - creators.add("Bob"); - doiMetadata.setCreators(creators); + DatasetFieldType dft = new DatasetFieldType(DatasetFieldConstant.authorName, FieldType.TEXT, false); + dft.setDisplayFormat("#VALUE"); + DatasetFieldType dft2 = new DatasetFieldType(DatasetFieldConstant.authorAffiliation, FieldType.TEXT, false); + dft2.setDisplayFormat("#VALUE"); + DatasetAuthor alice = new DatasetAuthor(); + DatasetField df1 = new DatasetField(); + df1.setDatasetFieldType(dft); + df1.setSingleValue("Alice"); + alice.setName(df1); + DatasetField df2 = new DatasetField(); + df2.setDatasetFieldType(dft2); + df2.setSingleValue("Harvard University"); + alice.setAffiliation(df2); + DatasetAuthor bob = new DatasetAuthor(); + DatasetField df3 = new DatasetField(); + df3.setDatasetFieldType(dft); + df3.setSingleValue("Bob"); + bob.setName(df3); + DatasetField df4 = new DatasetField(); + df4.setDatasetFieldType(dft2); + df4.setSingleValue("QDR"); + bob.setAffiliation(df4); + List authors = new ArrayList<>(); + authors.add(alice); + authors.add(bob); + doiMetadata.setAuthors(authors); doiMetadata.setPublisher("Dataverse"); XmlMetadataTemplate template = new XmlMetadataTemplate(doiMetadata); @@ -102,6 +132,13 @@ public void testDataCiteXMLCreation() throws IOException { String xml = template.generateXML(d); System.out.println("Output is " + xml); + try { + StreamSource source = new StreamSource(new StringReader(xml)); + source.setSystemId("DataCite XML for test dataset"); + assertTrue(XmlValidator.validateXmlSchema(source, new URL("https://schema.datacite.org/meta/kernel-4/metadata.xsd"))); + } catch (SAXException e) { + System.out.println("Invalid schema: " + e.getMessage()); + } } From 3ec7a0b680ec5f04d650e830bb391c6be1f176f2 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 23 May 2024 16:24:09 -0400 Subject: [PATCH 018/105] fix contributorType --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 74da57094c4..6e4d81d6248 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -535,7 +535,7 @@ private void writeEntityElements(XMLStreamWriter xmlw, String elementName, Strin xmlw.writeStartElement(elementName); Map attributeMap = new HashMap(); if (StringUtils.isNotBlank(type)) { - attributeMap.put("contributorType", type); + xmlw.writeAttribute("contributorType", type); } // person name=, if (entityObject.getBoolean("isPerson")) { From 842dee678530391264b2869ec71ab70258901189 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 23 May 2024 17:21:57 -0400 Subject: [PATCH 019/105] add geolocations element and multiple geolocation --- .../pidproviders/doi/XmlMetadataTemplate.java | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 6e4d81d6248..d0986616bb4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -1174,15 +1174,18 @@ private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws X List places = dv.getGeographicCoverage(); if (places != null && !places.isEmpty()) { // geoLocationPlace - geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocation", geoLocationsWritten); - + geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocations", geoLocationsWritten); for (String[] place : places) { + xmlw.startElement("geoLocation"); // + ArrayList placeList = new ArrayList(); for (String placePart : place) { placeList.add(placePart); } XmlWriterUtil.writeFullElement(xmlw, "geoLocationPlace", Strings.join(placeList, ", ")); + xmlw.endElement(); // } + } boolean boundingBoxFound = false; boolean productionPlaceFound = false; @@ -1213,7 +1216,8 @@ private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws X } } if (StringUtils.isNoneBlank(wLongitude, eLongitude, nLatitude, sLatitude)) { - geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocation", geoLocationsWritten); + geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocations", geoLocationsWritten); + xmlw.startElement("geoLocation"); // if (wLongitude.equals(eLongitude) && nLatitude.equals(sLatitude)) { // A point xmlw.writeStartElement("geoLocationPoint"); @@ -1230,15 +1234,18 @@ private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws X xmlw.writeEndElement(); } + xmlw.endElement(); // } } case DatasetFieldConstant.productionPlace: productionPlaceFound = true; // geoLocationPlace - geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocation", geoLocationsWritten); + geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocations", geoLocationsWritten); List prodPlaces = dsf.getValues(); for (String prodPlace : prodPlaces) { + xmlw.startElement("geoLocation"); // XmlWriterUtil.writeFullElement(xmlw, "geoLocationPlace", prodPlace); + xmlw.endElement(); // } break; } From 81a7c4a946ee4e54ae91913c6de6857fb6a553ba Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Thu, 23 May 2024 17:29:19 -0400 Subject: [PATCH 020/105] typos --- .../pidproviders/doi/XmlMetadataTemplate.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index d0986616bb4..127a1930860 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -1176,14 +1176,14 @@ private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws X // geoLocationPlace geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocations", geoLocationsWritten); for (String[] place : places) { - xmlw.startElement("geoLocation"); // + xmlw.writeStartElement("geoLocation"); // ArrayList placeList = new ArrayList(); for (String placePart : place) { placeList.add(placePart); } XmlWriterUtil.writeFullElement(xmlw, "geoLocationPlace", Strings.join(placeList, ", ")); - xmlw.endElement(); // + xmlw.writeEndElement(); // } } @@ -1217,7 +1217,7 @@ private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws X } if (StringUtils.isNoneBlank(wLongitude, eLongitude, nLatitude, sLatitude)) { geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocations", geoLocationsWritten); - xmlw.startElement("geoLocation"); // + xmlw.writeStartElement("geoLocation"); // if (wLongitude.equals(eLongitude) && nLatitude.equals(sLatitude)) { // A point xmlw.writeStartElement("geoLocationPoint"); @@ -1234,7 +1234,7 @@ private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws X xmlw.writeEndElement(); } - xmlw.endElement(); // + xmlw.writeEndElement(); // } } case DatasetFieldConstant.productionPlace: @@ -1243,9 +1243,9 @@ private void writeGeoLocations(XMLStreamWriter xmlw, DvObject dvObject) throws X geoLocationsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "geoLocations", geoLocationsWritten); List prodPlaces = dsf.getValues(); for (String prodPlace : prodPlaces) { - xmlw.startElement("geoLocation"); // + xmlw.writeStartElement("geoLocation"); // XmlWriterUtil.writeFullElement(xmlw, "geoLocationPlace", prodPlace); - xmlw.endElement(); // + xmlw.writeEndElement(); // } break; } From ed5eab0deb487ebfbb53157a40e2cf409d5f40ab Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 09:53:12 -0400 Subject: [PATCH 021/105] try execute inside the main method trying to avoid a separate tx boundary --- .../command/impl/CuratePublishedDatasetVersionCommand.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java index f83041d87bd..fbff40a9c80 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java @@ -167,9 +167,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { DeleteDatasetVersionCommand cmd; cmd = new DeleteDatasetVersionCommand(getRequest(), savedDataset); - ctxt.engine().submit(cmd); - // Running the command above reindexes the dataset, so we don't need to do it - // again in here. + cmd.execute(ctxt); // And update metadata at PID provider ctxt.engine().submit( From 39673f05e6d4394d3549c58a5d487a9c732113c2 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 10:41:42 -0400 Subject: [PATCH 022/105] Fix subject, keyword --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 127a1930860..85e28670cfc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -305,8 +305,10 @@ private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLSt // fields if (dvObject instanceof Dataset d) { DatasetVersion dv = d.getLatestVersionForCopy(); - dv.getDatasetSubjects(); for (DatasetField dsf : dv.getDatasetFields()) { + if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.subject)) { + subjects.addAll(dsf.getValues()); + } if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.keyword)) { compoundKeywords = dsf.getDatasetFieldCompoundValues(); } else if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.topicClassification)) { @@ -330,7 +332,7 @@ private void writeSubjects(XMLStreamWriter xmlw, DvObject dvObject) throws XMLSt for (DatasetField subField : keywordFieldValue.getChildDatasetFields()) { switch (subField.getDatasetFieldType().getName()) { - case DatasetFieldConstant.keyword: + case DatasetFieldConstant.keywordValue: keyword = subField.getValue(); break; case DatasetFieldConstant.keywordVocab: From 36097d61bbf0c92aab48db01ff02e1c23b86be1a Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 10:41:51 -0400 Subject: [PATCH 023/105] fix geo coverage --- src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index d723cf3d528..6648419216d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1342,9 +1342,8 @@ public List getGeographicCoverage() { } geoCoverages.add(coverageItem); } - + break; } - break; } return geoCoverages; } From a5d3b3e5a40b049176a6c3a205b1199c2117694e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 11:47:04 -0400 Subject: [PATCH 024/105] adjust funders to include grant number, add xml escaping for description --- .../pidproviders/doi/XmlMetadataTemplate.java | 70 +++++++++++++++---- 1 file changed, 58 insertions(+), 12 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 85e28670cfc..3b6a5cb2906 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -1082,7 +1082,7 @@ private void writeDescriptions(XMLStreamWriter xmlw, DvObject dvObject) throws X if (descriptions != null) { for (String description : descriptions) { descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); - XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, description); + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "description", attributes, StringEscapeUtils.escapeXml10(description)); } } @@ -1272,21 +1272,67 @@ private void writeFundingReferences(XMLStreamWriter xmlw, DvObject dvObject) thr dv = df.getOwner().getLatestVersionForCopy(); } if (dv != null) { - List funders = dv.getFunders(); - if (!funders.isEmpty()) { - - for (String funder : funders) { - if (!StringUtils.isBlank(funder)) { - fundingReferenceWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "fundingReferences", fundingReferenceWritten); - xmlw.writeStartElement("fundingReference"); // - XmlWriterUtil.writeFullElement(xmlw, "funderName", funder); - xmlw.writeEndElement(); // + List retList = new ArrayList<>(); + for (DatasetField dsf : dv.getDatasetFields()) { + if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.contributor)) { + boolean addFunder = false; + for (DatasetFieldCompoundValue contributorValue : dsf.getDatasetFieldCompoundValues()) { + String contributorName = null; + String contributorType = null; + for (DatasetField subField : contributorValue.getChildDatasetFields()) { + if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.contributorName)) { + contributorName = subField.getDisplayValue(); + } + if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.contributorType)) { + contributorType = subField.getRawValue(); + } + } + // SEK 02/12/2019 move outside loop to prevent contrib type to carry over to + // next contributor + // TODO: Consider how this will work in French, Chinese, etc. + if ("Funder".equals(contributorType)) { + if (!StringUtils.isBlank(contributorName)) { + fundingReferenceWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "fundingReferences", fundingReferenceWritten); + xmlw.writeStartElement("fundingReference"); // + XmlWriterUtil.writeFullElement(xmlw, "funderName", contributorName); + xmlw.writeEndElement(); // + } + } } } - if (fundingReferenceWritten) { - xmlw.writeEndElement(); // + if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.grantNumber)) { + for (DatasetFieldCompoundValue grantObject : dsf.getDatasetFieldCompoundValues()) { + String funder = null; + String awardNumber = null; + for (DatasetField subField : grantObject.getChildDatasetFields()) { + // It would be nice to do something with grantNumberValue (the actual number) + // but schema.org doesn't support it. + if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.grantNumberAgency)) { + String grantAgency = subField.getDisplayValue(); + funder = grantAgency; + } else if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.grantNumberValue)) { + String grantNumberValue = subField.getDisplayValue(); + awardNumber = grantNumberValue; + } + } + if (!StringUtils.isBlank(funder)) { + fundingReferenceWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "fundingReferences", fundingReferenceWritten); + xmlw.writeStartElement("fundingReference"); // + XmlWriterUtil.writeFullElement(xmlw, "funderName", funder); + if (StringUtils.isNotBlank(awardNumber)) { + writeFullElement(xmlw, null, "awardNumber", null, awardNumber); + } + xmlw.writeEndElement(); // + } + + } } } + + if (fundingReferenceWritten) { + xmlw.writeEndElement(); // + } + } } } \ No newline at end of file From 8a12444d3b835a1df989bc674337897b7feaf1d2 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 11:49:03 -0400 Subject: [PATCH 025/105] bug: add dataset descriptions --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 3b6a5cb2906..564768991cb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -1069,7 +1069,7 @@ private void writeDescriptions(XMLStreamWriter xmlw, DvObject dvObject) throws X if (dvObject instanceof Dataset d) { dv = d.getLatestVersionForCopy(); - dv.getDescriptions(); + descriptions = dv.getDescriptions(); } else if (dvObject instanceof DataFile df) { String description = df.getDescription(); if (description != null) { From f3e5dc1d00e1d68a734b4c593ae5b874bb5d14a2 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 11:54:50 -0400 Subject: [PATCH 026/105] typo, add xml escape for funder --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 564768991cb..a4fd4585028 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -1294,7 +1294,7 @@ private void writeFundingReferences(XMLStreamWriter xmlw, DvObject dvObject) thr if (!StringUtils.isBlank(contributorName)) { fundingReferenceWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "fundingReferences", fundingReferenceWritten); xmlw.writeStartElement("fundingReference"); // - XmlWriterUtil.writeFullElement(xmlw, "funderName", contributorName); + XmlWriterUtil.writeFullElement(xmlw, "funderName", StringEscapeUtils.escapeXml10(contributorName)); xmlw.writeEndElement(); // } } @@ -1318,9 +1318,9 @@ private void writeFundingReferences(XMLStreamWriter xmlw, DvObject dvObject) thr if (!StringUtils.isBlank(funder)) { fundingReferenceWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "fundingReferences", fundingReferenceWritten); xmlw.writeStartElement("fundingReference"); // - XmlWriterUtil.writeFullElement(xmlw, "funderName", funder); + XmlWriterUtil.writeFullElement(xmlw, "funderName", StringEscapeUtils.escapeXml10(funder)); if (StringUtils.isNotBlank(awardNumber)) { - writeFullElement(xmlw, null, "awardNumber", null, awardNumber); + writeFullElement(xmlw, null, "awardNumber", StringEscapeUtils.escapeXml10(awardNumber)); } xmlw.writeEndElement(); // } From 5610c950212f2d3d80d7144c37c98e6cd0b71c5e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 12:00:09 -0400 Subject: [PATCH 027/105] still typo --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index a4fd4585028..e2883cad1f9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -1320,7 +1320,7 @@ private void writeFundingReferences(XMLStreamWriter xmlw, DvObject dvObject) thr xmlw.writeStartElement("fundingReference"); // XmlWriterUtil.writeFullElement(xmlw, "funderName", StringEscapeUtils.escapeXml10(funder)); if (StringUtils.isNotBlank(awardNumber)) { - writeFullElement(xmlw, null, "awardNumber", StringEscapeUtils.escapeXml10(awardNumber)); + XmlWriterUtil.writeFullElement(xmlw, "awardNumber", StringEscapeUtils.escapeXml10(awardNumber)); } xmlw.writeEndElement(); // } From 7148b03360b00363f6550aed5d5d851ab7c2c356 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 14:14:48 -0400 Subject: [PATCH 028/105] mark contact as deprecated - unused --- .../java/edu/harvard/iq/dataverse/DatasetFieldConstant.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java index 22bad42df96..c3e385dcff2 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java @@ -156,6 +156,8 @@ public class DatasetFieldConstant implements java.io.Serializable { public final static String confidentialityDeclaration="confidentialityDeclaration"; public final static String specialPermissions="specialPermissions"; public final static String restrictions="restrictions"; + @Deprecated + //Doesn't appear to be used and is not datasetContact public final static String contact="contact"; public final static String citationRequirements="citationRequirements"; public final static String depositorRequirements="depositorRequirements"; From 0470459316b3338bd940ea2f9afcf9ec1430eab1 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 14:14:59 -0400 Subject: [PATCH 029/105] more fixes --- .../pidproviders/doi/XmlMetadataTemplate.java | 28 +++++++++++++------ 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index e2883cad1f9..fd5a4ecf7fb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -427,7 +427,7 @@ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws X case DatasetFieldConstant.distributor: compoundDistributors = dsf.getDatasetFieldCompoundValues(); break; - case DatasetFieldConstant.contact: + case DatasetFieldConstant.datasetContact: compoundContacts = dsf.getDatasetFieldCompoundValues(); break; case DatasetFieldConstant.contributor: @@ -638,7 +638,7 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea } if (releaseDate != null) { - String date = Util.getDateTimeFormat().format(releaseDate); + String date = Util.getDateFormat().format(releaseDate); datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); attributes.put("dateType", "Available"); @@ -660,6 +660,14 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea } } if (StringUtils.isNotBlank(startDate) || StringUtils.isNotBlank(endDate)) { + if(StringUtils.isNotBlank(startDate)) { + Date start = Util.getDateTimeFormat().parse(startDate); + startDate = Util.getDateFormat().format(start); + } + if(StringUtils.isNotBlank(endDate)) { + Date end = Util.getDateTimeFormat().parse(endDate); + endDate = Util.getDateFormat().format(end); + } datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); attributes.put("dateType", "Collected"); XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, (startDate + "/" + endDate).trim()); @@ -675,14 +683,14 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea private void writeLanguage(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { // Currently not supported. Spec indicates one 'primary' language. Could send // the first entry in DatasetFieldConstant.language or send iff there is only - // one entry, and/or default to the machine's default lang? + // one entry, and/or default to the machine's default lang, or the dataverse metadatalang? return; } // 10, ResourceType (with mandatory general type // description sub- property) (M) private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { - List kindOfDataValues = new ArrayList(); + List kindOfDataValues = new ArrayList(); Map attributes = new HashMap(); attributes.put("resourceTypeGeneral", "Dataset"); @@ -691,7 +699,7 @@ private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws X for (DatasetField dsf : dv.getDatasetFields()) { switch (dsf.getDatasetFieldType().getName()) { case DatasetFieldConstant.kindOfData: - kindOfDataValues = dsf.getControlledVocabularyValues(); + kindOfDataValues.addAll(dsf.getValues()); break; } } @@ -701,8 +709,7 @@ private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws X xmlw.writeAttribute("resourceTypeGeneral", attributes.get("resourceTypeGeneral")); xmlw.writeEndElement(); } else { - for (ControlledVocabularyValue kindOfDataValue : kindOfDataValues) { - String resourceType = kindOfDataValue.getStrValue(); + for (String resourceType : kindOfDataValues) { if (StringUtils.isNotBlank(resourceType)) { XmlWriterUtil.writeFullElementWithAttributes(xmlw, "resourceType", attributes, resourceType); } @@ -821,14 +828,16 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th * way those two fields are used for all identifier types. The code here is * ~best effort to interpret those fields. */ + logger.info("Found relpub: " + pubIdType + " " + identifier + " " + url); pubIdType = getCanonicalPublicationType(pubIdType); - +logger.info("Canonical type: " + pubIdType); // Prefer url if set, otherwise check identifier String relatedIdentifier = url; if (StringUtils.isBlank(relatedIdentifier)) { relatedIdentifier = identifier; } + logger.info("Related identifier: " + relatedIdentifier); // For types where we understand the protocol, get the canonical form if (pubIdType != null) { switch (pubIdType) { @@ -836,12 +845,15 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th if (!relatedIdentifier.startsWith("doi:") || relatedIdentifier.startsWith("http")) { relatedIdentifier = "doi:" + relatedIdentifier; } + logger.info("Intermediate Related identifier: " + relatedIdentifier); try { GlobalId pid = PidUtil.parseAsGlobalID(relatedIdentifier); relatedIdentifier = pid.asRawIdentifier(); } catch (IllegalArgumentException e) { + logger.warning("Invalid DOI: " + e.getLocalizedMessage()); relatedIdentifier = null; } + logger.info("Final Related identifier: " + relatedIdentifier); break; case "Handle": if (!relatedIdentifier.startsWith("hdl:") || relatedIdentifier.startsWith("http")) { From c0265da5324c6f68e9356a44261ce6b166ded6b8 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 14:22:29 -0400 Subject: [PATCH 030/105] catch parseexception --- .../pidproviders/doi/XmlMetadataTemplate.java | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index fd5a4ecf7fb..9ed417e77ce 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -5,6 +5,7 @@ import java.io.OutputStream; import java.net.MalformedURLException; import java.net.URL; +import java.text.ParseException; import java.util.ArrayList; import java.util.Date; import java.util.HashMap; @@ -661,12 +662,20 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea } if (StringUtils.isNotBlank(startDate) || StringUtils.isNotBlank(endDate)) { if(StringUtils.isNotBlank(startDate)) { + try { Date start = Util.getDateTimeFormat().parse(startDate); startDate = Util.getDateFormat().format(start); + } catch (ParseException e) { + logger.warning("Could not parse date: " + startDate); + } } if(StringUtils.isNotBlank(endDate)) { + try { Date end = Util.getDateTimeFormat().parse(endDate); - endDate = Util.getDateFormat().format(end); + endDate = Util.getDateFormat().format(end); + } catch (ParseException e) { + logger.warning("Could not parse date: " + endDate); + }; } datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); attributes.put("dateType", "Collected"); From 2ff867850500aa9b2eb5712348b65cf48ed4b917 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 14:52:42 -0400 Subject: [PATCH 031/105] fix alternateIdentifier, related PID parsing, series --- .../pidproviders/doi/XmlMetadataTemplate.java | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 9ed417e77ce..7f861b3e42d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -755,7 +755,7 @@ private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) } if (altPids != null && !altPids.isEmpty()) { - alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternativeIdentifiers", alternatesWritten); + alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternateIdentifiers", alternatesWritten); for (AlternativePersistentIdentifier altPid : altPids) { String identifierType = null; String identifier = null; @@ -775,7 +775,7 @@ private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) identifier = altPid.getAuthority() + altPid.getIdentifier(); break; } - attributes.put("alternativeIdentifierType", identifierType); + attributes.put("alternateIdentifierType", identifierType); XmlWriterUtil.writeFullElementWithAttributes(xmlw, "alternateIdentifier", attributes, identifier); } @@ -795,9 +795,9 @@ private void writeAlternateIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) break; } } - attributes.put("alternativeIdentifierType", identifierType); + attributes.put("alternateIdentifierType", identifierType); if (!StringUtils.isBlank(identifier)) { - alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternativeIdentifiers", alternatesWritten); + alternatesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "alternateIdentifiers", alternatesWritten); XmlWriterUtil.writeFullElementWithAttributes(xmlw, "alternateIdentifier", attributes, identifier); } @@ -851,7 +851,7 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th if (pubIdType != null) { switch (pubIdType) { case "DOI": - if (!relatedIdentifier.startsWith("doi:") || relatedIdentifier.startsWith("http")) { + if (!relatedIdentifier.startsWith("doi:") || !relatedIdentifier.startsWith("http")) { relatedIdentifier = "doi:" + relatedIdentifier; } logger.info("Intermediate Related identifier: " + relatedIdentifier); @@ -865,7 +865,7 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th logger.info("Final Related identifier: " + relatedIdentifier); break; case "Handle": - if (!relatedIdentifier.startsWith("hdl:") || relatedIdentifier.startsWith("http")) { + if (!relatedIdentifier.startsWith("hdl:") || !relatedIdentifier.startsWith("http")) { relatedIdentifier = "hdl:" + relatedIdentifier; } try { @@ -1158,7 +1158,7 @@ private void writeDescriptions(XMLStreamWriter xmlw, DvObject dvObject) throws X List childDsfs = dsfcv.getChildDatasetFields(); for (DatasetField childDsf : childDsfs) { - if (DatasetFieldConstant.seriesInformation.equals(childDsf.getDatasetFieldType().getName())) { + if (DatasetFieldConstant.seriesName.equals(childDsf.getDatasetFieldType().getName())) { String seriesInformation = childDsf.getValue(); if (StringUtils.isNotBlank(seriesInformation)) { descriptionsWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "descriptions", descriptionsWritten); From 182f3d7bca310c54eb44f0452c76671b26b03824 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 14:53:47 -0400 Subject: [PATCH 032/105] catch PID update exception to avoid corrupt dataset --- .../CuratePublishedDatasetVersionCommand.java | 45 ++++++++++++------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java index fbff40a9c80..dd8b19e0c3b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CuratePublishedDatasetVersionCommand.java @@ -1,6 +1,7 @@ package edu.harvard.iq.dataverse.engine.command.impl; import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.batch.util.LoggingUtil; import edu.harvard.iq.dataverse.datavariable.VarGroup; import edu.harvard.iq.dataverse.engine.command.CommandContext; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; @@ -151,7 +152,11 @@ public Dataset execute(CommandContext ctxt) throws CommandException { tempDataset.setThumbnailFile(publishedFmd.getDataFile()); } } - + if(logger.isLoggable(Level.FINE)) { + for(FileMetadata fmd: updateVersion.getFileMetadatas()) { + logger.fine("Id: " + fmd.getId() + " label: " + fmd.getLabel()); + } + } // Update modification time on the published version and the dataset updateVersion.setLastUpdateTime(getTimestamp()); tempDataset.setModificationTime(getTimestamp()); @@ -170,28 +175,38 @@ public Dataset execute(CommandContext ctxt) throws CommandException { cmd.execute(ctxt); // And update metadata at PID provider - ctxt.engine().submit( - new UpdateDvObjectPIDMetadataCommand(savedDataset, getRequest())); - - //And the exported metadata files try { - ExportService instance = ExportService.getInstance(); - instance.exportAllFormats(getDataset()); - } catch (ExportException ex) { - // Just like with indexing, a failure to export is not a fatal condition. - logger.log(Level.WARNING, "Curate Published DatasetVersion: exception while exporting metadata files:{0}", ex.getMessage()); + ctxt.engine().submit( + new UpdateDvObjectPIDMetadataCommand(savedDataset, getRequest())); + } catch (CommandException ex) { + //Make this non-fatal as after the DeleteDatasetVersionCommand, we can't roll back - for some reason no datasetfields remain in the DB + //(The old version doesn't need them and the new version doesn't get updated to include them?) + logger.log(Level.WARNING, "Curate Published DatasetVersion: exception while updating PID metadata:{0}", ex.getMessage()); } - - // Update so that getDataset() in updateDatasetUser will get the up-to-date copy // (with no draft version) setDataset(savedDataset); updateDatasetUser(ctxt); - - - return savedDataset; } + @Override + public boolean onSuccess(CommandContext ctxt, Object r) { + boolean retVal = true; + Dataset d = (Dataset) r; + + ctxt.index().asyncIndexDataset(d, true); + + // And the exported metadata files + try { + ExportService instance = ExportService.getInstance(); + instance.exportAllFormats(d); + } catch (ExportException ex) { + // Just like with indexing, a failure to export is not a fatal condition. + retVal = false; + logger.log(Level.WARNING, "Curate Published DatasetVersion: exception while exporting metadata files:{0}", ex.getMessage()); + } + return retVal; + } } From be903555bcc3c21169b3a7343a076c331d308bf6 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 16:24:40 -0400 Subject: [PATCH 033/105] try long sleep --- .../java/edu/harvard/iq/dataverse/DatasetServiceBean.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index dab0ff43fcf..29de42f3578 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -944,9 +944,9 @@ public void callFinalizePublishCommandAsynchronously(Long datasetId, CommandCont // portion of the PublishDatasetCommand. I'm going to leave the 1 second // sleep below, for just in case reasons: -- L.A. try { - Thread.sleep(1000); + Thread.sleep(5000); } catch (Exception ex) { - logger.warning("Failed to sleep for a second."); + logger.warning("Failed to sleep for five seconds."); } logger.fine("Running FinalizeDatasetPublicationCommand, asynchronously"); Dataset theDataset = find(datasetId); From e458e8ca5c14f48d8da94d83c2cb40e76e9198d6 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 16:53:28 -0400 Subject: [PATCH 034/105] set dv released before pid publicize, go back to short time --- .../java/edu/harvard/iq/dataverse/DatasetServiceBean.java | 4 ++-- .../command/impl/FinalizeDatasetPublicationCommand.java | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java index 29de42f3578..18bd6dc74ee 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetServiceBean.java @@ -944,9 +944,9 @@ public void callFinalizePublishCommandAsynchronously(Long datasetId, CommandCont // portion of the PublishDatasetCommand. I'm going to leave the 1 second // sleep below, for just in case reasons: -- L.A. try { - Thread.sleep(5000); + Thread.sleep(1000); } catch (Exception ex) { - logger.warning("Failed to sleep for five seconds."); + logger.warning("Failed to sleep for one second."); } logger.fine("Running FinalizeDatasetPublicationCommand, asynchronously"); Dataset theDataset = find(datasetId); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java index 287e877f6e0..299bb3168de 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/FinalizeDatasetPublicationCommand.java @@ -211,7 +211,7 @@ public Dataset execute(CommandContext ctxt) throws CommandException { if (theDataset.getLatestVersion().getVersionState() != RELEASED) { // some imported datasets may already be released. - + theDataset.getLatestVersion().setVersionState(RELEASED); if (!datasetExternallyReleased) { publicizeExternalIdentifier(theDataset, ctxt); // Will throw a CommandException, unless successful. @@ -220,7 +220,6 @@ public Dataset execute(CommandContext ctxt) throws CommandException { // a failure - it will remove any locks, and it will send a // proper notification to the user(s). } - theDataset.getLatestVersion().setVersionState(RELEASED); } final Dataset ds = ctxt.em().merge(theDataset); From 27fe7b4d0dad6124d5e036c8ec5d36b31a371a9b Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 24 May 2024 17:03:13 -0400 Subject: [PATCH 035/105] always use latest version for copy --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 7f861b3e42d..0adc9984b3d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -146,7 +146,7 @@ private void writeTitles(XMLStreamWriter xmlw, DvObject dvObject, String languag List altTitles = new ArrayList<>(); // Only Datasets can have a subtitle or alternative titles if (dvObject instanceof Dataset d) { - DatasetVersion dv = d.getLatestVersion(); + DatasetVersion dv = d.getLatestVersionForCopy(); Optional subTitleField = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.subTitle)).findFirst(); if (subTitleField.isPresent()) { subTitle = subTitleField.get().getValue(); From 00a383007686b49b938f88415b476e377d054f98 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 09:53:40 -0400 Subject: [PATCH 036/105] handle deaccession, fix relatedIDtype for files --- .../edu/harvard/iq/dataverse/DataFile.java | 19 +++ .../pidproviders/doi/XmlMetadataTemplate.java | 117 +++++++++++------- 2 files changed, 90 insertions(+), 46 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataFile.java b/src/main/java/edu/harvard/iq/dataverse/DataFile.java index 29a4a14c021..1a610d9ea6e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataFile.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataFile.java @@ -1123,4 +1123,23 @@ private boolean tagExists(String tagLabel) { } return false; } + + public boolean isDeaccessioned() { + // return true, if all published versions were deaccessioned + boolean inDeaccessionedVersions = false; + for (FileMetadata fmd : getFileMetadatas()) { + DatasetVersion testDsv = fmd.getDatasetVersion(); + if (testDsv.isReleased()) { + return false; + } + // Also check for draft version + if (testDsv.isDraft()) { + return false; + } + if (testDsv.isDeaccessioned()) { + inDeaccessionedVersions = true; + } + } + return inDeaccessionedVersions; // since any published version would have already returned + } } // end of class diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 0adc9984b3d..96ee84fe13b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -100,30 +100,41 @@ private void generateXML(DvObject dvObject, OutputStream outputStream) throws XM String metadataLanguage = null; // when set, otherwise = language? XMLStreamWriter xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream); xmlw.writeStartElement("resource"); - + boolean deaccessioned=false; + if(dvObject instanceof Dataset d) { + deaccessioned=d.isDeaccessioned(); + } else if (dvObject instanceof DataFile df) { + deaccessioned = df.isDeaccessioned(); + } xmlw.writeDefaultNamespace(XML_NAMESPACE); xmlw.writeAttribute("xmlns:xsi", XML_XSI); xmlw.writeAttribute("xsi:schemaLocation", XML_SCHEMA_LOCATION); writeIdentifier(xmlw, dvObject); - writeCreators(xmlw, doiMetadata.getAuthors()); - writeTitles(xmlw, dvObject, language); - writePublisher(xmlw, dvObject); - writePublicationYear(xmlw, dvObject); - writeSubjects(xmlw, dvObject); - writeContributors(xmlw, dvObject); - writeDates(xmlw, dvObject); - writeLanguage(xmlw, dvObject); + writeCreators(xmlw, doiMetadata.getAuthors(), deaccessioned); + writeTitles(xmlw, dvObject, language, deaccessioned); + writePublisher(xmlw, dvObject, deaccessioned); + writePublicationYear(xmlw, dvObject, deaccessioned); + if (!deaccessioned) { + writeSubjects(xmlw, dvObject); + writeContributors(xmlw, dvObject); + writeDates(xmlw, dvObject); + writeLanguage(xmlw, dvObject); + } writeResourceType(xmlw, dvObject); - writeAlternateIdentifiers(xmlw, dvObject); - writeRelatedIdentifiers(xmlw, dvObject); - writeSize(xmlw, dvObject); - writeFormats(xmlw, dvObject); - writeVersion(xmlw, dvObject); - writeAccessRights(xmlw, dvObject); - writeDescriptions(xmlw, dvObject); - writeGeoLocations(xmlw, dvObject); - writeFundingReferences(xmlw, dvObject); + if (!deaccessioned) { + writeAlternateIdentifiers(xmlw, dvObject); + writeRelatedIdentifiers(xmlw, dvObject); + writeSize(xmlw, dvObject); + writeFormats(xmlw, dvObject); + writeVersion(xmlw, dvObject); + writeAccessRights(xmlw, dvObject); + } + writeDescriptions(xmlw, dvObject, deaccessioned); + if (!deaccessioned) { + writeGeoLocations(xmlw, dvObject); + writeFundingReferences(xmlw, dvObject); + } xmlw.writeEndElement(); xmlw.flush(); } @@ -140,23 +151,29 @@ private void generateXML(DvObject dvObject, OutputStream outputStream) throws XM * @return * @throws XMLStreamException */ - private void writeTitles(XMLStreamWriter xmlw, DvObject dvObject, String language) throws XMLStreamException { - String title = doiMetadata.getTitle(); + private void writeTitles(XMLStreamWriter xmlw, DvObject dvObject, String language, boolean deaccessioned) throws XMLStreamException { + String title = null; String subTitle = null; List altTitles = new ArrayList<>(); - // Only Datasets can have a subtitle or alternative titles - if (dvObject instanceof Dataset d) { - DatasetVersion dv = d.getLatestVersionForCopy(); - Optional subTitleField = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.subTitle)).findFirst(); - if (subTitleField.isPresent()) { - subTitle = subTitleField.get().getValue(); - } - Optional altTitleField = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.alternativeTitle)).findFirst(); - if (altTitleField.isPresent()) { - altTitles = altTitleField.get().getValues(); + + if (!deaccessioned) { + doiMetadata.getTitle(); + + // Only Datasets can have a subtitle or alternative titles + if (dvObject instanceof Dataset d) { + DatasetVersion dv = d.getLatestVersionForCopy(); + Optional subTitleField = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.subTitle)).findFirst(); + if (subTitleField.isPresent()) { + subTitle = subTitleField.get().getValue(); + } + Optional altTitleField = dv.getDatasetFields().stream().filter(f -> f.getDatasetFieldType().getName().equals(DatasetFieldConstant.alternativeTitle)).findFirst(); + if (altTitleField.isPresent()) { + altTitles = altTitleField.get().getValues(); + } } + } else { + title = AbstractDOIProvider.UNAVAILABLE; } - if (StringUtils.isNotBlank(title) || StringUtils.isNotBlank(subTitle) || (altTitles != null && !String.join("", altTitles).isBlank())) { xmlw.writeStartElement("titles"); if (StringUtils.isNotBlank(title)) { @@ -227,13 +244,13 @@ private void writeIdentifier(XMLStreamWriter xmlw, DvObject dvObject) throws XML * - the list of authors * @throws XMLStreamException */ - public void writeCreators(XMLStreamWriter xmlw, List authorList) throws XMLStreamException { + public void writeCreators(XMLStreamWriter xmlw, List authorList, boolean deaccessioned) throws XMLStreamException { // creators -> creator -> creatorName with nameType attribute, givenName, // familyName, nameIdentifier // write all creators xmlw.writeStartElement("creators"); // - if (authorList != null && !authorList.isEmpty()) { + if (!deaccessioned && authorList != null && !authorList.isEmpty()) { for (DatasetAuthor author : authorList) { String creatorName = StringEscapeUtils.escapeXml10(author.getName().getDisplayValue()); String affiliation = null; @@ -267,18 +284,21 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList) xmlw.writeEndElement(); // } - private void writePublisher(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + private void writePublisher(XMLStreamWriter xmlw, DvObject dvObject, boolean deaccessioned) throws XMLStreamException { // publisher should already be non null - :unav if it wasn't available + if(deaccessioned) { + doiMetadata.setPublisher(AbstractPidProvider.UNAVAILABLE); + } XmlWriterUtil.writeFullElement(xmlw, "publisher", doiMetadata.getPublisher()); } - private void writePublicationYear(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + private void writePublicationYear(XMLStreamWriter xmlw, DvObject dvObject, boolean deaccessioned) throws XMLStreamException { // Can't use "UNKNOWN" here because DataCite will respond with "[facet // 'pattern'] the value 'unknown' is not accepted by the pattern '[\d]{4}'" String pubYear = "9999"; // FIXME: Investigate why this.publisherYear is sometimes null now that pull // request #4606 has been merged. - if (doiMetadata.getPublisherYear() != null) { + if (! deaccessioned && (doiMetadata.getPublisherYear() != null)) { // Added to prevent a NullPointerException when trying to destroy datasets when // using DataCite rather than EZID. pubYear = doiMetadata.getPublisherYear(); @@ -926,6 +946,7 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th attributes.clear(); attributes.put("relationType", "IsPartOf"); + attributes.put("relatedIdentifierType", pubIdType); relatedIdentifiersWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "relatedIdentifiers", relatedIdentifiersWritten); XmlWriterUtil.writeFullElementWithAttributes(xmlw, "relatedIdentifier", attributes, pid.asRawIdentifier()); } @@ -1082,20 +1103,24 @@ private void writeAccessRights(XMLStreamWriter xmlw, DvObject dvObject) throws X xmlw.writeEndElement(); // } - private void writeDescriptions(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStreamException { + private void writeDescriptions(XMLStreamWriter xmlw, DvObject dvObject, boolean deaccessioned) throws XMLStreamException { // descriptions -> description with descriptionType attribute boolean descriptionsWritten = false; List descriptions = null; DatasetVersion dv = null; - - if (dvObject instanceof Dataset d) { - dv = d.getLatestVersionForCopy(); - descriptions = dv.getDescriptions(); - } else if (dvObject instanceof DataFile df) { - String description = df.getDescription(); - if (description != null) { - descriptions = new ArrayList(); - descriptions.add(description); + if(deaccessioned) { + descriptions = new ArrayList(); + descriptions.add(AbstractDOIProvider.UNAVAILABLE); + } else { + if (dvObject instanceof Dataset d) { + dv = d.getLatestVersionForCopy(); + descriptions = dv.getDescriptions(); + } else if (dvObject instanceof DataFile df) { + String description = df.getDescription(); + if (description != null) { + descriptions = new ArrayList(); + descriptions.add(description); + } } } Map attributes = new HashMap(); From 1faf0cd84c5ef52a8e88afb7989e238058870916 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 10:26:30 -0400 Subject: [PATCH 037/105] missed assignment for title --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 96ee84fe13b..b2008e14a89 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -157,7 +157,7 @@ private void writeTitles(XMLStreamWriter xmlw, DvObject dvObject, String languag List altTitles = new ArrayList<>(); if (!deaccessioned) { - doiMetadata.getTitle(); + title = doiMetadata.getTitle(); // Only Datasets can have a subtitle or alternative titles if (dvObject instanceof Dataset d) { From 23dd581c98b921908b2cdcca13d32e8731c76e7e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 10:54:06 -0400 Subject: [PATCH 038/105] fix creator for deaccessioned --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index b2008e14a89..8f962204302 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -249,8 +249,10 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList, // familyName, nameIdentifier // write all creators xmlw.writeStartElement("creators"); // - - if (!deaccessioned && authorList != null && !authorList.isEmpty()) { + if(deaccessioned) { + authorList = null; + } + if (authorList != null && !authorList.isEmpty()) { for (DatasetAuthor author : authorList) { String creatorName = StringEscapeUtils.escapeXml10(author.getName().getDisplayValue()); String affiliation = null; From 3bbd2e9dfc4cd3d1e5cebd5a9cf7dbbfe52c1fa4 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 11:51:50 -0400 Subject: [PATCH 039/105] correct fix for creators when deaccessioned --- .../pidproviders/doi/XmlMetadataTemplate.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 8f962204302..a3eca9ef9a0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -250,8 +250,10 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList, // write all creators xmlw.writeStartElement("creators"); // if(deaccessioned) { + //skip the loop below authorList = null; } + boolean nothingWritten = true; if (authorList != null && !authorList.isEmpty()) { for (DatasetAuthor author : authorList) { String creatorName = StringEscapeUtils.escapeXml10(author.getName().getDisplayValue()); @@ -274,15 +276,17 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList, if (StringUtils.isNotBlank(creatorName)) { JsonObject creatorObj = PersonOrOrgUtil.getPersonOrOrganization(creatorName, false, StringUtils.containsIgnoreCase(nameIdentifierScheme, "orcid")); + nothingWritten = false; writeEntityElements(xmlw, "creator", null, creatorObj, affiliation, nameIdentifier, nameIdentifierScheme); } - else { - // Authors unavailable - XmlWriterUtil.writeFullElement(xmlw, "creator", "creatorName", AbstractPidProvider.UNAVAILABLE); - } + } } + if (nothingWritten) { + // Authors unavailable + XmlWriterUtil.writeFullElement(xmlw, "creator", "creatorName", AbstractPidProvider.UNAVAILABLE); + } xmlw.writeEndElement(); // } From 4def6da32c207223fb2cf9d5aada50f921ddd474 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 12:00:15 -0400 Subject: [PATCH 040/105] remove bad value and lang --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index a3eca9ef9a0..2d09c67fea9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -285,7 +285,7 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList, } if (nothingWritten) { // Authors unavailable - XmlWriterUtil.writeFullElement(xmlw, "creator", "creatorName", AbstractPidProvider.UNAVAILABLE); + XmlWriterUtil.writeFullElement(xmlw, "creator", AbstractPidProvider.UNAVAILABLE); } xmlw.writeEndElement(); // } From eac477ec3c2fef4f48759b110f6157081b301eff Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 12:46:07 -0400 Subject: [PATCH 041/105] add creatorName sub element for deaccession/no names case --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 2d09c67fea9..a660a80448a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -285,7 +285,9 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList, } if (nothingWritten) { // Authors unavailable - XmlWriterUtil.writeFullElement(xmlw, "creator", AbstractPidProvider.UNAVAILABLE); + xmlw.writeStartElement("creator"); + XmlWriterUtil.writeFullElement(xmlw, "creatorName", AbstractPidProvider.UNAVAILABLE); + xmlw.writeEndElement("creator"); } xmlw.writeEndElement(); // } From 154ac8a91554be29492d62454f1b0e52501b5af2 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 12:49:59 -0400 Subject: [PATCH 042/105] typo --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index a660a80448a..732a633116e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -287,7 +287,7 @@ public void writeCreators(XMLStreamWriter xmlw, List authorList, // Authors unavailable xmlw.writeStartElement("creator"); XmlWriterUtil.writeFullElement(xmlw, "creatorName", AbstractPidProvider.UNAVAILABLE); - xmlw.writeEndElement("creator"); + xmlw.writeEndElement(); } xmlw.writeEndElement(); // } From 9144f6c96ae0685a7ac719d2203f0aed3f71e85e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 13:16:32 -0400 Subject: [PATCH 043/105] fix resourceType - always 1 entry --- .../pidproviders/doi/XmlMetadataTemplate.java | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 732a633116e..8f6211c0730 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -736,23 +736,24 @@ private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws X for (DatasetField dsf : dv.getDatasetFields()) { switch (dsf.getDatasetFieldType().getName()) { case DatasetFieldConstant.kindOfData: - kindOfDataValues.addAll(dsf.getValues()); + List vals = dsf.getValues(); + for(String val: vals) { + if(StringUtils.isNotBlank(val)) { + kindOfDataValues.add(val); + } + } break; } } - if (kindOfDataValues.isEmpty()) { + if (!kindOfDataValues.isEmpty()) { + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "resourceType", attributes, String.join(", ", kindOfDataValues)); + + } else { // Write an attribute only element if there are no kindOfData values. xmlw.writeStartElement("resourceType"); xmlw.writeAttribute("resourceTypeGeneral", attributes.get("resourceTypeGeneral")); xmlw.writeEndElement(); - } else { - for (String resourceType : kindOfDataValues) { - if (StringUtils.isNotBlank(resourceType)) { - XmlWriterUtil.writeFullElementWithAttributes(xmlw, "resourceType", attributes, resourceType); - } - } } - } } From a5870fbaf3a89e9c6100b8e1f4371caa291c1e23 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 28 May 2024 13:26:41 -0400 Subject: [PATCH 044/105] Also handle file case for resourceType --- .../pidproviders/doi/XmlMetadataTemplate.java | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 8f6211c0730..7d817d57a2a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -745,16 +745,17 @@ private void writeResourceType(XMLStreamWriter xmlw, DvObject dvObject) throws X break; } } - if (!kindOfDataValues.isEmpty()) { - XmlWriterUtil.writeFullElementWithAttributes(xmlw, "resourceType", attributes, String.join(", ", kindOfDataValues)); - - } else { - // Write an attribute only element if there are no kindOfData values. - xmlw.writeStartElement("resourceType"); - xmlw.writeAttribute("resourceTypeGeneral", attributes.get("resourceTypeGeneral")); - xmlw.writeEndElement(); - } } + if (!kindOfDataValues.isEmpty()) { + XmlWriterUtil.writeFullElementWithAttributes(xmlw, "resourceType", attributes, String.join(", ", kindOfDataValues)); + + } else { + // Write an attribute only element if there are no kindOfData values. + xmlw.writeStartElement("resourceType"); + xmlw.writeAttribute("resourceTypeGeneral", attributes.get("resourceTypeGeneral")); + xmlw.writeEndElement(); + } + } /** From 24db2af2bfe9564eedc1ee9aedae8b9048bd551e Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 31 May 2024 14:47:06 -0400 Subject: [PATCH 045/105] missed changes --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 7d817d57a2a..f5bd009e8d7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -23,6 +23,7 @@ import javax.xml.stream.XMLStreamWriter; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.text.StringEscapeUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; @@ -39,6 +40,7 @@ import edu.harvard.iq.dataverse.DatasetFieldConstant; import edu.harvard.iq.dataverse.DatasetFieldType; import edu.harvard.iq.dataverse.DatasetFieldValue; +import edu.harvard.iq.dataverse.DatasetRelPublication; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.GlobalId; @@ -703,7 +705,7 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea endDate = Util.getDateFormat().format(end); } catch (ParseException e) { logger.warning("Could not parse date: " + endDate); - }; + }; } datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); attributes.put("dateType", "Collected"); @@ -853,7 +855,6 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th Map attributes = new HashMap(); if (dvObject instanceof Dataset dataset) { - List relatedPublications = dataset.getLatestVersionForCopy().getRelatedPublications(); if (!relatedPublications.isEmpty()) { for (DatasetRelPublication relatedPub : relatedPublications) { @@ -967,7 +968,6 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th } } - static HashMap relatedIdentifierTypeMap = new HashMap(); private static String getCanonicalPublicationType(String pubIdType) { From f0fd61ad555369ef1af9a1529797cf8d73d6efde Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 31 May 2024 15:05:02 -0400 Subject: [PATCH 046/105] simplify - util checks for null and empty --- .../dataverse/export/ddi/DdiExportUtil.java | 46 +++++-------------- 1 file changed, 12 insertions(+), 34 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java index c0e3057696a..f5efc448090 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/ddi/DdiExportUtil.java @@ -371,7 +371,7 @@ private static void writeDocDescElement (XMLStreamWriter xmlw, DatasetDTO datase private static void writeVersionStatement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO) throws XMLStreamException{ xmlw.writeStartElement("verStmt"); - XmlWriterUtil.writeAttribute(xmlw,"source","archive"); + xmlw.writeAttribute("source","archive"); xmlw.writeStartElement("version"); XmlWriterUtil.writeAttribute(xmlw,"date", datasetVersionDTO.getReleaseTime().substring(0, 10)); XmlWriterUtil.writeAttribute(xmlw,"type", datasetVersionDTO.getVersionState().toString()); @@ -842,9 +842,7 @@ private static void writeAuthorsElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!authorName.isEmpty()){ xmlw.writeStartElement("AuthEnty"); - if(!authorAffiliation.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"affiliation",authorAffiliation); - } + XmlWriterUtil.writeAttribute(xmlw,"affiliation",authorAffiliation); xmlw.writeCharacters(authorName); xmlw.writeEndElement(); //AuthEnty } @@ -865,9 +863,7 @@ private static void writeAuthorsElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!contributorName.isEmpty()){ xmlw.writeStartElement("othId"); - if(!contributorType.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"role", contributorType); - } + XmlWriterUtil.writeAttribute(xmlw,"role", contributorType); xmlw.writeCharacters(contributorName); xmlw.writeEndElement(); //othId } @@ -907,12 +903,8 @@ private static void writeContactsElement(XMLStreamWriter xmlw, DatasetVersionDTO // TODO: Since datasetContactEmail is a required field but datasetContactName is not consider not checking if datasetContactName is empty so we can write out datasetContactEmail. if (!datasetContactName.isEmpty()){ xmlw.writeStartElement("contact"); - if(!datasetContactAffiliation.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"affiliation",datasetContactAffiliation); - } - if(!datasetContactEmail.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"email",datasetContactEmail); - } + XmlWriterUtil.writeAttribute(xmlw,"affiliation",datasetContactAffiliation); + XmlWriterUtil.writeAttribute(xmlw,"email",datasetContactEmail); xmlw.writeCharacters(datasetContactName); xmlw.writeEndElement(); //AuthEnty } @@ -957,9 +949,7 @@ private static void writeProducersElement(XMLStreamWriter xmlw, DatasetVersionDT xmlw.writeStartElement("producer"); XmlWriterUtil.writeAttribute(xmlw, "affiliation", producerAffiliation); XmlWriterUtil.writeAttribute(xmlw, "abbr", producerAbbreviation); - /*if (!producerLogo.isEmpty()) { - XmlWriterUtil.writeAttribute(xmlw, "role", producerLogo); - }*/ + //XmlWriterUtil.writeAttribute(xmlw, "role", producerLogo); xmlw.writeCharacters(producerName); xmlw.writeEndElement(); //AuthEnty } @@ -1139,9 +1129,7 @@ private static void writeAbstractElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!descriptionText.isEmpty()){ xmlw.writeStartElement("abstract"); - if(!descriptionDate.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"date",descriptionDate); - } + XmlWriterUtil.writeAttribute(xmlw,"date",descriptionDate); if(DvObjectContainer.isMetadataLanguageSet(lang)) { xmlw.writeAttribute("xml:lang", lang); } @@ -1176,9 +1164,7 @@ private static void writeGrantElement(XMLStreamWriter xmlw, DatasetVersionDTO da } if (!grantNumber.isEmpty()){ xmlw.writeStartElement("grantNo"); - if(!grantAgency.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"agency",grantAgency); - } + XmlWriterUtil.writeAttribute(xmlw,"agency",grantAgency); xmlw.writeCharacters(grantNumber); xmlw.writeEndElement(); //grantno } @@ -1210,9 +1196,7 @@ private static void writeOtherIdElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!otherId.isEmpty()){ xmlw.writeStartElement("IDNo"); - if(!otherIdAgency.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"agency",otherIdAgency); - } + XmlWriterUtil.writeAttribute(xmlw,"agency",otherIdAgency); xmlw.writeCharacters(otherId); xmlw.writeEndElement(); //IDNo } @@ -1244,9 +1228,7 @@ private static void writeSoftwareElement(XMLStreamWriter xmlw, DatasetVersionDTO } if (!softwareName.isEmpty()){ xmlw.writeStartElement("software"); - if(!softwareVersion.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"version",softwareVersion); - } + XmlWriterUtil.writeAttribute(xmlw,"version",softwareVersion); xmlw.writeCharacters(softwareName); xmlw.writeEndElement(); //software } @@ -1359,12 +1341,8 @@ private static void writeNotesElement(XMLStreamWriter xmlw, DatasetVersionDTO da } if (!notesText.isEmpty()) { xmlw.writeStartElement("notes"); - if(!notesType.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"type",notesType); - } - if(!notesSubject.isEmpty()){ - XmlWriterUtil.writeAttribute(xmlw,"subject",notesSubject); - } + XmlWriterUtil.writeAttribute(xmlw,"type",notesType); + XmlWriterUtil.writeAttribute(xmlw,"subject",notesSubject); xmlw.writeCharacters(notesText); xmlw.writeEndElement(); } From ead153f502ef77258d20f6faf4a0fc8282a74687 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 10 Jun 2024 12:18:45 -0400 Subject: [PATCH 047/105] typo in DOI parsing logic --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index f5bd009e8d7..e6c1a1ae6b4 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -882,7 +882,7 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th if (pubIdType != null) { switch (pubIdType) { case "DOI": - if (!relatedIdentifier.startsWith("doi:") || !relatedIdentifier.startsWith("http")) { + if (!(relatedIdentifier.startsWith("doi:") || relatedIdentifier.startsWith("http"))) { relatedIdentifier = "doi:" + relatedIdentifier; } logger.info("Intermediate Related identifier: " + relatedIdentifier); From ea75216025ca358f1c9d7d3c11d324b6dbe4f0f2 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Mon, 10 Jun 2024 12:58:46 -0400 Subject: [PATCH 048/105] only files in latestversionforcopy --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index e6c1a1ae6b4..ae7c21b3308 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -43,6 +43,7 @@ import edu.harvard.iq.dataverse.DatasetRelPublication; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.DvObject; +import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.GlobalId; import edu.harvard.iq.dataverse.TermsOfUseAndAccess; import edu.harvard.iq.dataverse.api.Util; @@ -934,10 +935,12 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th } } } - if (!dataset.getFiles().isEmpty() && !(dataset.getFiles().get(0).getIdentifier() == null)) { + List fmds = dataset.getLatestVersionForCopy().getFileMetadatas(); + if (!(fmds==null) && fmds.isEmpty()) { attributes.clear(); attributes.put("relationType", "HasPart"); - for (DataFile dataFile : dataset.getFiles()) { + for (FileMetadata fmd : fmds) { + DataFile dataFile = fmd.getDataFile(); GlobalId pid = dataFile.getGlobalId(); if (pid != null) { String pubIdType = getCanonicalPublicationType(pid.getProtocol()); From b6bd530db70dfbc78017445e4e2af79233b6e899 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Tue, 11 Jun 2024 16:40:58 -0400 Subject: [PATCH 049/105] fix date parsing, clear bad values --- .../dataverse/pidproviders/doi/XmlMetadataTemplate.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index ae7c21b3308..9ba1e4e3116 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -692,22 +692,27 @@ private void writeDates(XMLStreamWriter xmlw, DvObject dvObject) throws XMLStrea } } if (StringUtils.isNotBlank(startDate) || StringUtils.isNotBlank(endDate)) { + //Minimal clean-up - useful? Parse/format would remove unused chars, and an exception would clear the date so we don't send nonsense if(StringUtils.isNotBlank(startDate)) { try { - Date start = Util.getDateTimeFormat().parse(startDate); + Date start = Util.getDateFormat().parse(startDate); startDate = Util.getDateFormat().format(start); } catch (ParseException e) { logger.warning("Could not parse date: " + startDate); + startDate = null; } } if(StringUtils.isNotBlank(endDate)) { try { - Date end = Util.getDateTimeFormat().parse(endDate); + Date end = Util.getDateFormat().parse(endDate); endDate = Util.getDateFormat().format(end); } catch (ParseException e) { logger.warning("Could not parse date: " + endDate); + endDate = null; }; } + } + if (StringUtils.isNotBlank(startDate) || StringUtils.isNotBlank(endDate)) { datesWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "dates", datesWritten); attributes.put("dateType", "Collected"); XmlWriterUtil.writeFullElementWithAttributes(xmlw, "date", attributes, (startDate + "/" + endDate).trim()); From e1383d77d3304418ebf3decfd257ca0610994ce1 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 14 Jun 2024 13:10:04 -0400 Subject: [PATCH 050/105] relationType entry in citation block --- conf/solr/9.3.0/schema.xml | 2 + ...dataset-create-new-all-default-fields.json | 2935 ++++++++--------- scripts/api/data/metadatablocks/citation.tsv | 105 +- .../iq/dataverse/DatasetFieldConstant.java | 1 + .../dublincore/DublinCoreExportUtil.java | 35 +- .../export/openaire/OpenAireExportUtil.java | 9 +- .../java/propertyFiles/citation.properties | 2 + .../export/OpenAireExportUtilTest.java | 2 +- 8 files changed, 1502 insertions(+), 1589 deletions(-) diff --git a/conf/solr/9.3.0/schema.xml b/conf/solr/9.3.0/schema.xml index 5dde750573d..32f10d0a621 100644 --- a/conf/solr/9.3.0/schema.xml +++ b/conf/solr/9.3.0/schema.xml @@ -349,6 +349,7 @@ + @@ -589,6 +590,7 @@ + diff --git a/scripts/api/data/dataset-create-new-all-default-fields.json b/scripts/api/data/dataset-create-new-all-default-fields.json index cc856c6372f..e522ab32b1d 100644 --- a/scripts/api/data/dataset-create-new-all-default-fields.json +++ b/scripts/api/data/dataset-create-new-all-default-fields.json @@ -1,1527 +1,1410 @@ { - "datasetVersion": { - "license": { - "name": "CC0 1.0", - "uri": "http://creativecommons.org/publicdomain/zero/1.0" - }, - "metadataBlocks": { - "citation": { - "displayName": "Citation Metadata", - "fields": [ - { - "typeName": "title", - "multiple": false, - "typeClass": "primitive", - "value": "Replication Data for: Title" - }, - { - "typeName": "subtitle", - "multiple": false, - "typeClass": "primitive", - "value": "Subtitle" - }, - { - "typeName": "alternativeTitle", - "multiple": true, - "typeClass": "primitive", - "value": ["Alternative Title"] - }, - { - "typeName": "alternativeURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://AlternativeURL.org" - }, - { - "typeName": "otherId", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "otherIdAgency": { - "typeName": "otherIdAgency", - "multiple": false, - "typeClass": "primitive", - "value": "OtherIDAgency1" - }, - "otherIdValue": { - "typeName": "otherIdValue", - "multiple": false, - "typeClass": "primitive", - "value": "OtherIDIdentifier1" - } - }, - { - "otherIdAgency": { - "typeName": "otherIdAgency", - "multiple": false, - "typeClass": "primitive", - "value": "OtherIDAgency2" - }, - "otherIdValue": { - "typeName": "otherIdValue", - "multiple": false, - "typeClass": "primitive", - "value": "OtherIDIdentifier2" - } - } - ] - }, - { - "typeName": "author", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "authorName": { - "typeName": "authorName", - "multiple": false, - "typeClass": "primitive", - "value": "LastAuthor1, FirstAuthor1" - }, - "authorAffiliation": { - "typeName": "authorAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "AuthorAffiliation1" - }, - "authorIdentifierScheme": { - "typeName": "authorIdentifierScheme", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "ORCID" - }, - "authorIdentifier": { - "typeName": "authorIdentifier", - "multiple": false, - "typeClass": "primitive", - "value": "AuthorIdentifier1" - } - }, - { - "authorName": { - "typeName": "authorName", - "multiple": false, - "typeClass": "primitive", - "value": "LastAuthor2, FirstAuthor2" - }, - "authorAffiliation": { - "typeName": "authorAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "AuthorAffiliation2" - }, - "authorIdentifierScheme": { - "typeName": "authorIdentifierScheme", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "ISNI" - }, - "authorIdentifier": { - "typeName": "authorIdentifier", - "multiple": false, - "typeClass": "primitive", - "value": "AuthorIdentifier2" - } - } - ] - }, - { - "typeName": "datasetContact", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "datasetContactName": { - "typeName": "datasetContactName", - "multiple": false, - "typeClass": "primitive", - "value": "LastContact1, FirstContact1" - }, - "datasetContactAffiliation": { - "typeName": "datasetContactAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "ContactAffiliation1" - }, - "datasetContactEmail": { - "typeName": "datasetContactEmail", - "multiple": false, - "typeClass": "primitive", - "value": "ContactEmail1@mailinator.com" - } - }, - { - "datasetContactName": { - "typeName": "datasetContactName", - "multiple": false, - "typeClass": "primitive", - "value": "LastContact2, FirstContact2" - }, - "datasetContactAffiliation": { - "typeName": "datasetContactAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "ContactAffiliation2" - }, - "datasetContactEmail": { - "typeName": "datasetContactEmail", - "multiple": false, - "typeClass": "primitive", - "value": "ContactEmail2@mailinator.com" - } - } - ] - }, - { - "typeName": "dsDescription", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "dsDescriptionValue": { - "typeName": "dsDescriptionValue", - "multiple": false, - "typeClass": "primitive", - "value": "DescriptionText1" - }, - "dsDescriptionDate": { - "typeName": "dsDescriptionDate", - "multiple": false, - "typeClass": "primitive", - "value": "1000-01-01" - } - }, - { - "dsDescriptionValue": { - "typeName": "dsDescriptionValue", - "multiple": false, - "typeClass": "primitive", - "value": "DescriptionText2" - }, - "dsDescriptionDate": { - "typeName": "dsDescriptionDate", - "multiple": false, - "typeClass": "primitive", - "value": "1000-02-02" - } - } - ] - }, - { - "typeName": "subject", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "Agricultural Sciences", - "Business and Management", - "Engineering", - "Law" - ] - }, - { - "typeName": "keyword", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "keywordValue": { - "typeName": "keywordValue", - "multiple": false, - "typeClass": "primitive", - "value": "KeywordTerm1" - }, - "keywordTermURI": { - "typeName": "keywordTermURI", - "multiple": false, - "typeClass": "primitive", - "value": "http://keywordTermURI1.org" - }, - "keywordVocabulary": { - "typeName": "keywordVocabulary", - "multiple": false, - "typeClass": "primitive", - "value": "KeywordVocabulary1" - }, - "keywordVocabularyURI": { - "typeName": "keywordVocabularyURI", - "multiple": false, - "typeClass": "primitive", - "value": "http://KeywordVocabularyURL1.org" - } - }, - { - "keywordValue": { - "typeName": "keywordValue", - "multiple": false, - "typeClass": "primitive", - "value": "KeywordTerm2" - }, - "keywordTermURI": { - "typeName": "keywordTermURI", - "multiple": false, - "typeClass": "primitive", - "value": "http://keywordTermURI2.org" - }, - "keywordVocabulary": { - "typeName": "keywordVocabulary", - "multiple": false, - "typeClass": "primitive", - "value": "KeywordVocabulary2" - }, - "keywordVocabularyURI": { - "typeName": "keywordVocabularyURI", - "multiple": false, - "typeClass": "primitive", - "value": "http://KeywordVocabularyURL2.org" - } - } - ] - }, - { - "typeName": "topicClassification", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "topicClassValue": { - "typeName": "topicClassValue", - "multiple": false, - "typeClass": "primitive", - "value": "Topic Classification Term1" - }, - "topicClassVocab": { - "typeName": "topicClassVocab", - "multiple": false, - "typeClass": "primitive", - "value": "Topic Classification Vocab1" - }, - "topicClassVocabURI": { - "typeName": "topicClassVocabURI", - "multiple": false, - "typeClass": "primitive", - "value": "https://TopicClassificationURL1.com" - } - }, - { - "topicClassValue": { - "typeName": "topicClassValue", - "multiple": false, - "typeClass": "primitive", - "value": "Topic Classification Term2" - }, - "topicClassVocab": { - "typeName": "topicClassVocab", - "multiple": false, - "typeClass": "primitive", - "value": "Topic Classification Vocab2" - }, - "topicClassVocabURI": { - "typeName": "topicClassVocabURI", - "multiple": false, - "typeClass": "primitive", - "value": "https://TopicClassificationURL2.com" - } - } - ] - }, - { - "typeName": "publication", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "publicationCitation": { - "typeName": "publicationCitation", - "multiple": false, - "typeClass": "primitive", - "value": "RelatedPublicationCitation1" - }, - "publicationIDType": { - "typeName": "publicationIDType", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "ark" - }, - "publicationIDNumber": { - "typeName": "publicationIDNumber", - "multiple": false, - "typeClass": "primitive", - "value": "RelatedPublicationIDNumber1" - }, - "publicationURL": { - "typeName": "publicationURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://RelatedPublicationURL1.org" - } - }, - { - "publicationCitation": { - "typeName": "publicationCitation", - "multiple": false, - "typeClass": "primitive", - "value": "RelatedPublicationCitation2" - }, - "publicationIDType": { - "typeName": "publicationIDType", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "arXiv" - }, - "publicationIDNumber": { - "typeName": "publicationIDNumber", - "multiple": false, - "typeClass": "primitive", - "value": "RelatedPublicationIDNumber2" - }, - "publicationURL": { - "typeName": "publicationURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://RelatedPublicationURL2.org" - } - } - ] - }, - { - "typeName": "notesText", - "multiple": false, - "typeClass": "primitive", - "value": "Notes1" - }, - { - "typeName": "language", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "Abkhaz", - "Afar" - ] - }, - { - "typeName": "producer", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "producerName": { - "typeName": "producerName", - "multiple": false, - "typeClass": "primitive", - "value": "LastProducer1, FirstProducer1" - }, - "producerAffiliation": { - "typeName": "producerAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "ProducerAffiliation1" - }, - "producerAbbreviation": { - "typeName": "producerAbbreviation", - "multiple": false, - "typeClass": "primitive", - "value": "ProducerAbbreviation1" - }, - "producerURL": { - "typeName": "producerURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://ProducerURL1.org" - }, - "producerLogoURL": { - "typeName": "producerLogoURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://ProducerLogoURL1.org" - } - }, - { - "producerName": { - "typeName": "producerName", - "multiple": false, - "typeClass": "primitive", - "value": "LastProducer2, FirstProducer2" - }, - "producerAffiliation": { - "typeName": "producerAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "ProducerAffiliation2" - }, - "producerAbbreviation": { - "typeName": "producerAbbreviation", - "multiple": false, - "typeClass": "primitive", - "value": "ProducerAbbreviation2" - }, - "producerURL": { - "typeName": "producerURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://ProducerURL2.org" - }, - "producerLogoURL": { - "typeName": "producerLogoURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://ProducerLogoURL2.org" - } - } - ] - }, - { - "typeName": "productionDate", - "multiple": false, - "typeClass": "primitive", - "value": "1003-01-01" - }, - { - "typeName": "productionPlace", - "multiple": true, - "typeClass": "primitive", - "value": ["ProductionPlace"] - }, - { - "typeName": "contributor", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "contributorType": { - "typeName": "contributorType", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "Data Collector" - }, - "contributorName": { - "typeName": "contributorName", - "multiple": false, - "typeClass": "primitive", - "value": "LastContributor1, FirstContributor1" - } - }, - { - "contributorType": { - "typeName": "contributorType", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "Data Curator" - }, - "contributorName": { - "typeName": "contributorName", - "multiple": false, - "typeClass": "primitive", - "value": "LastContributor2, FirstContributor2" - } - } - ] - }, - { - "typeName": "grantNumber", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "grantNumberAgency": { - "typeName": "grantNumberAgency", - "multiple": false, - "typeClass": "primitive", - "value": "GrantInformationGrantAgency1" - }, - "grantNumberValue": { - "typeName": "grantNumberValue", - "multiple": false, - "typeClass": "primitive", - "value": "GrantInformationGrantNumber1" - } - }, - { - "grantNumberAgency": { - "typeName": "grantNumberAgency", - "multiple": false, - "typeClass": "primitive", - "value": "GrantInformationGrantAgency2" - }, - "grantNumberValue": { - "typeName": "grantNumberValue", - "multiple": false, - "typeClass": "primitive", - "value": "GrantInformationGrantNumber2" - } - } - ] - }, - { - "typeName": "distributor", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "distributorName": { - "typeName": "distributorName", - "multiple": false, - "typeClass": "primitive", - "value": "LastDistributor1, FirstDistributor1" - }, - "distributorAffiliation": { - "typeName": "distributorAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "DistributorAffiliation1" - }, - "distributorAbbreviation": { - "typeName": "distributorAbbreviation", - "multiple": false, - "typeClass": "primitive", - "value": "DistributorAbbreviation1" - }, - "distributorURL": { - "typeName": "distributorURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://DistributorURL1.org" - }, - "distributorLogoURL": { - "typeName": "distributorLogoURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://DistributorLogoURL1.org" - } - }, - { - "distributorName": { - "typeName": "distributorName", - "multiple": false, - "typeClass": "primitive", - "value": "LastDistributor2, FirstDistributor2" - }, - "distributorAffiliation": { - "typeName": "distributorAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "DistributorAffiliation2" - }, - "distributorAbbreviation": { - "typeName": "distributorAbbreviation", - "multiple": false, - "typeClass": "primitive", - "value": "DistributorAbbreviation2" - }, - "distributorURL": { - "typeName": "distributorURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://DistributorURL2.org" - }, - "distributorLogoURL": { - "typeName": "distributorLogoURL", - "multiple": false, - "typeClass": "primitive", - "value": "http://DistributorLogoURL2.org" - } - } - ] - }, - { - "typeName": "distributionDate", - "multiple": false, - "typeClass": "primitive", - "value": "1004-01-01" - }, - { - "typeName": "depositor", - "multiple": false, - "typeClass": "primitive", - "value": "LastDepositor, FirstDepositor" - }, - { - "typeName": "dateOfDeposit", - "multiple": false, - "typeClass": "primitive", - "value": "1002-01-01" - }, - { - "typeName": "timePeriodCovered", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "timePeriodCoveredStart": { - "typeName": "timePeriodCoveredStart", - "multiple": false, - "typeClass": "primitive", - "value": "1005-01-01" - }, - "timePeriodCoveredEnd": { - "typeName": "timePeriodCoveredEnd", - "multiple": false, - "typeClass": "primitive", - "value": "1005-01-02" - } - }, - { - "timePeriodCoveredStart": { - "typeName": "timePeriodCoveredStart", - "multiple": false, - "typeClass": "primitive", - "value": "1005-02-01" - }, - "timePeriodCoveredEnd": { - "typeName": "timePeriodCoveredEnd", - "multiple": false, - "typeClass": "primitive", - "value": "1005-02-02" - } - } - ] - }, - { - "typeName": "dateOfCollection", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "dateOfCollectionStart": { - "typeName": "dateOfCollectionStart", - "multiple": false, - "typeClass": "primitive", - "value": "1006-01-01" - }, - "dateOfCollectionEnd": { - "typeName": "dateOfCollectionEnd", - "multiple": false, - "typeClass": "primitive", - "value": "1006-01-01" - } - }, - { - "dateOfCollectionStart": { - "typeName": "dateOfCollectionStart", - "multiple": false, - "typeClass": "primitive", - "value": "1006-02-01" - }, - "dateOfCollectionEnd": { - "typeName": "dateOfCollectionEnd", - "multiple": false, - "typeClass": "primitive", - "value": "1006-02-02" - } - } - ] - }, - { - "typeName": "kindOfData", - "multiple": true, - "typeClass": "primitive", - "value": [ - "KindOfData1", - "KindOfData2" - ] - }, - { - "typeName": "series", - "multiple": true, - "typeClass": "compound", - "value": [{ - "seriesName": { - "typeName": "seriesName", - "multiple": false, - "typeClass": "primitive", - "value": "SeriesName" - }, - "seriesInformation": { - "typeName": "seriesInformation", - "multiple": false, - "typeClass": "primitive", - "value": "SeriesInformation" - } - }] - }, - { - "typeName": "software", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "softwareName": { - "typeName": "softwareName", - "multiple": false, - "typeClass": "primitive", - "value": "SoftwareName1" - }, - "softwareVersion": { - "typeName": "softwareVersion", - "multiple": false, - "typeClass": "primitive", - "value": "SoftwareVersion1" - } - }, - { - "softwareName": { - "typeName": "softwareName", - "multiple": false, - "typeClass": "primitive", - "value": "SoftwareName2" - }, - "softwareVersion": { - "typeName": "softwareVersion", - "multiple": false, - "typeClass": "primitive", - "value": "SoftwareVersion2" - } - } - ] - }, - { - "typeName": "relatedMaterial", - "multiple": true, - "typeClass": "primitive", - "value": [ - "RelatedMaterial1", - "RelatedMaterial2" - ] - }, - { - "typeName": "relatedDatasets", - "multiple": true, - "typeClass": "primitive", - "value": [ - "RelatedDatasets1", - "RelatedDatasets2" - ] - }, - { - "typeName": "otherReferences", - "multiple": true, - "typeClass": "primitive", - "value": [ - "OtherReferences1", - "OtherReferences2" - ] - }, - { - "typeName": "dataSources", - "multiple": true, - "typeClass": "primitive", - "value": [ - "DataSources1", - "DataSources2" - ] - }, - { - "typeName": "originOfSources", - "multiple": false, - "typeClass": "primitive", - "value": "OriginOfSources" - }, - { - "typeName": "characteristicOfSources", - "multiple": false, - "typeClass": "primitive", - "value": "CharacteristicOfSourcesNoted" - }, - { - "typeName": "accessToSources", - "multiple": false, - "typeClass": "primitive", - "value": "DocumentationAndAccessToSources" - } - ] - }, - "geospatial": { - "displayName": "Geospatial Metadata", - "fields": [ - { - "typeName": "geographicCoverage", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "country": { - "typeName": "country", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "Afghanistan" - }, - "state": { - "typeName": "state", - "multiple": false, - "typeClass": "primitive", - "value": "GeographicCoverageStateProvince1" - }, - "city": { - "typeName": "city", - "multiple": false, - "typeClass": "primitive", - "value": "GeographicCoverageCity1" - }, - "otherGeographicCoverage": { - "typeName": "otherGeographicCoverage", - "multiple": false, - "typeClass": "primitive", - "value": "GeographicCoverageOther1" - } - }, - { - "country": { - "typeName": "country", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "Albania" - }, - "state": { - "typeName": "state", - "multiple": false, - "typeClass": "primitive", - "value": "GeographicCoverageStateProvince2" - }, - "city": { - "typeName": "city", - "multiple": false, - "typeClass": "primitive", - "value": "GeographicCoverageCity2" - }, - "otherGeographicCoverage": { - "typeName": "otherGeographicCoverage", - "multiple": false, - "typeClass": "primitive", - "value": "GeographicCoverageOther2" - } - } - ] - }, - { - "typeName": "geographicUnit", - "multiple": true, - "typeClass": "primitive", - "value": [ - "GeographicUnit1", - "GeographicUnit2" - ] - }, - { - "typeName": "geographicBoundingBox", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "westLongitude": { - "typeName": "westLongitude", - "multiple": false, - "typeClass": "primitive", - "value": "-72" - }, - "eastLongitude": { - "typeName": "eastLongitude", - "multiple": false, - "typeClass": "primitive", - "value": "-70" - }, - "northLatitude": { - "typeName": "northLatitude", - "multiple": false, - "typeClass": "primitive", - "value": "43" - }, - "southLatitude": { - "typeName": "southLatitude", - "multiple": false, - "typeClass": "primitive", - "value": "42" - } - }, - { - "westLongitude": { - "typeName": "westLongitude", - "multiple": false, - "typeClass": "primitive", - "value": "-18" - }, - "eastLongitude": { - "typeName": "eastLongitude", - "multiple": false, - "typeClass": "primitive", - "value": "-13" - }, - "northLatitude": { - "typeName": "northLatitude", - "multiple": false, - "typeClass": "primitive", - "value": "29" - }, - "southLatitude": { - "typeName": "southLatitude", - "multiple": false, - "typeClass": "primitive", - "value": "28" - } - } - ] - } - ] - }, - "socialscience": { - "displayName": "Social Science and Humanities Metadata", - "fields": [ - { - "typeName": "unitOfAnalysis", - "multiple": true, - "typeClass": "primitive", - "value": [ - "UnitOfAnalysis1", - "UnitOfAnalysis2" - ] - }, - { - "typeName": "universe", - "multiple": true, - "typeClass": "primitive", - "value": [ - "Universe1", - "Universe2" - ] - }, - { - "typeName": "timeMethod", - "multiple": false, - "typeClass": "primitive", - "value": "TimeMethod" - }, - { - "typeName": "dataCollector", - "multiple": false, - "typeClass": "primitive", - "value": "LastDataCollector1, FirstDataCollector1" - }, - { - "typeName": "collectorTraining", - "multiple": false, - "typeClass": "primitive", - "value": "CollectorTraining" - }, - { - "typeName": "frequencyOfDataCollection", - "multiple": false, - "typeClass": "primitive", - "value": "Frequency" - }, - { - "typeName": "samplingProcedure", - "multiple": false, - "typeClass": "primitive", - "value": "SamplingProcedure" - }, - { - "typeName": "targetSampleSize", - "multiple": false, - "typeClass": "compound", - "value": { - "targetSampleActualSize": { - "typeName": "targetSampleActualSize", - "multiple": false, - "typeClass": "primitive", - "value": "100" - }, - "targetSampleSizeFormula": { - "typeName": "targetSampleSizeFormula", - "multiple": false, - "typeClass": "primitive", - "value": "TargetSampleSizeFormula" - } - } - }, - { - "typeName": "deviationsFromSampleDesign", - "multiple": false, - "typeClass": "primitive", - "value": "MajorDeviationsForSampleDesign" - }, - { - "typeName": "collectionMode", - "multiple": true, - "typeClass": "primitive", - "value": ["CollectionMode"] - }, - { - "typeName": "researchInstrument", - "multiple": false, - "typeClass": "primitive", - "value": "TypeOfResearchInstrument" - }, - { - "typeName": "dataCollectionSituation", - "multiple": false, - "typeClass": "primitive", - "value": "CharacteristicsOfDataCollectionSituation" - }, - { - "typeName": "actionsToMinimizeLoss", - "multiple": false, - "typeClass": "primitive", - "value": "ActionsToMinimizeLosses" - }, - { - "typeName": "controlOperations", - "multiple": false, - "typeClass": "primitive", - "value": "ControlOperations" - }, - { - "typeName": "weighting", - "multiple": false, - "typeClass": "primitive", - "value": "Weighting" - }, - { - "typeName": "cleaningOperations", - "multiple": false, - "typeClass": "primitive", - "value": "CleaningOperations" - }, - { - "typeName": "datasetLevelErrorNotes", - "multiple": false, - "typeClass": "primitive", - "value": "StudyLevelErrorNotes" - }, - { - "typeName": "responseRate", - "multiple": false, - "typeClass": "primitive", - "value": "ResponseRate" - }, - { - "typeName": "samplingErrorEstimates", - "multiple": false, - "typeClass": "primitive", - "value": "EstimatesOfSamplingError" - }, - { - "typeName": "otherDataAppraisal", - "multiple": false, - "typeClass": "primitive", - "value": "OtherFormsOfDataAppraisal" - }, - { - "typeName": "socialScienceNotes", - "multiple": false, - "typeClass": "compound", - "value": { - "socialScienceNotesType": { - "typeName": "socialScienceNotesType", - "multiple": false, - "typeClass": "primitive", - "value": "NotesType" - }, - "socialScienceNotesSubject": { - "typeName": "socialScienceNotesSubject", - "multiple": false, - "typeClass": "primitive", - "value": "NotesSubject" - }, - "socialScienceNotesText": { - "typeName": "socialScienceNotesText", - "multiple": false, - "typeClass": "primitive", - "value": "NotesText" - } - } - } - ] - }, - "astrophysics": { - "displayName": "Astronomy and Astrophysics Metadata", - "fields": [ - { - "typeName": "astroType", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "Image", - "Mosaic", - "EventList", - "Cube" - ] - }, - { - "typeName": "astroFacility", - "multiple": true, - "typeClass": "primitive", - "value": [ - "Facility1", - "Facility2" - ] - }, - { - "typeName": "astroInstrument", - "multiple": true, - "typeClass": "primitive", - "value": [ - "Instrument1", - "Instrument2" - ] - }, - { - "typeName": "astroObject", - "multiple": true, - "typeClass": "primitive", - "value": [ - "Object1", - "Object2" - ] - }, - { - "typeName": "resolution.Spatial", - "multiple": false, - "typeClass": "primitive", - "value": "SpatialResolution" - }, - { - "typeName": "resolution.Spectral", - "multiple": false, - "typeClass": "primitive", - "value": "SpectralResolution" - }, - { - "typeName": "resolution.Temporal", - "multiple": false, - "typeClass": "primitive", - "value": "TimeResolution" - }, - { - "typeName": "coverage.Spectral.Bandpass", - "multiple": true, - "typeClass": "primitive", - "value": [ - "Bandpass1", - "Bandpass2" - ] - }, - { - "typeName": "coverage.Spectral.CentralWavelength", - "multiple": true, - "typeClass": "primitive", - "value": [ - "3001", - "3002" - ] - }, - { - "typeName": "coverage.Spectral.Wavelength", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "coverage.Spectral.MinimumWavelength": { - "typeName": "coverage.Spectral.MinimumWavelength", - "multiple": false, - "typeClass": "primitive", - "value": "4001" - }, - "coverage.Spectral.MaximumWavelength": { - "typeName": "coverage.Spectral.MaximumWavelength", - "multiple": false, - "typeClass": "primitive", - "value": "4002" - } - }, - { - "coverage.Spectral.MinimumWavelength": { - "typeName": "coverage.Spectral.MinimumWavelength", - "multiple": false, - "typeClass": "primitive", - "value": "4003" - }, - "coverage.Spectral.MaximumWavelength": { - "typeName": "coverage.Spectral.MaximumWavelength", - "multiple": false, - "typeClass": "primitive", - "value": "4004" - } - } - ] - }, - { - "typeName": "coverage.Temporal", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "coverage.Temporal.StartTime": { - "typeName": "coverage.Temporal.StartTime", - "multiple": false, - "typeClass": "primitive", - "value": "1007-01-01" - }, - "coverage.Temporal.StopTime": { - "typeName": "coverage.Temporal.StopTime", - "multiple": false, - "typeClass": "primitive", - "value": "1007-01-02" - } - }, - { - "coverage.Temporal.StartTime": { - "typeName": "coverage.Temporal.StartTime", - "multiple": false, - "typeClass": "primitive", - "value": "1007-02-01" - }, - "coverage.Temporal.StopTime": { - "typeName": "coverage.Temporal.StopTime", - "multiple": false, - "typeClass": "primitive", - "value": "1007-02-02" - } - } - ] - }, - { - "typeName": "coverage.Spatial", - "multiple": true, - "typeClass": "primitive", - "value": [ - "SkyCoverage1", - "SkyCoverage2" - ] - }, - { - "typeName": "coverage.Depth", - "multiple": false, - "typeClass": "primitive", - "value": "200" - }, - { - "typeName": "coverage.ObjectDensity", - "multiple": false, - "typeClass": "primitive", - "value": "300" - }, - { - "typeName": "coverage.ObjectCount", - "multiple": false, - "typeClass": "primitive", - "value": "400" - }, - { - "typeName": "coverage.SkyFraction", - "multiple": false, - "typeClass": "primitive", - "value": "500" - }, - { - "typeName": "coverage.Polarization", - "multiple": false, - "typeClass": "primitive", - "value": "Polarization" - }, - { - "typeName": "redshiftType", - "multiple": false, - "typeClass": "primitive", - "value": "RedshiftType" - }, - { - "typeName": "resolution.Redshift", - "multiple": false, - "typeClass": "primitive", - "value": "600" - }, - { - "typeName": "coverage.RedshiftValue", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "coverage.Redshift.MinimumValue": { - "typeName": "coverage.Redshift.MinimumValue", - "multiple": false, - "typeClass": "primitive", - "value": "701" - }, - "coverage.Redshift.MaximumValue": { - "typeName": "coverage.Redshift.MaximumValue", - "multiple": false, - "typeClass": "primitive", - "value": "702" - } - }, - { - "coverage.Redshift.MinimumValue": { - "typeName": "coverage.Redshift.MinimumValue", - "multiple": false, - "typeClass": "primitive", - "value": "703" - }, - "coverage.Redshift.MaximumValue": { - "typeName": "coverage.Redshift.MaximumValue", - "multiple": false, - "typeClass": "primitive", - "value": "704" - } - } - ] - } - ] - }, - "biomedical": { - "displayName": "Life Sciences Metadata", - "fields": [ - { - "typeName": "studyDesignType", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "Case Control", - "Cross Sectional", - "Cohort Study", - "Not Specified" - ] - }, - { - "typeName": "studyFactorType", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "Age", - "Biomarkers", - "Cell Surface Markers", - "Developmental Stage" - ] - }, - { - "typeName": "studyAssayOrganism", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "Arabidopsis thaliana", - "Bos taurus", - "Caenorhabditis elegans", - "Danio rerio (zebrafish)" - ] - }, - { - "typeName": "studyAssayOtherOrganism", - "multiple": true, - "typeClass": "primitive", - "value": [ - "OtherOrganism1", - "OtherOrganism2" - ] - }, - { - "typeName": "studyAssayMeasurementType", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "genome sequencing", - "cell sorting", - "clinical chemistry analysis", - "DNA methylation profiling" - ] - }, - { - "typeName": "studyAssayOtherMeasurmentType", - "multiple": true, - "typeClass": "primitive", - "value": [ - "OtherMeasurementType1", - "OtherMeasurementType2" - ] - }, - { - "typeName": "studyAssayTechnologyType", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "culture based drug susceptibility testing, single concentration", - "culture based drug susceptibility testing, two concentrations", - "culture based drug susceptibility testing, three or more concentrations (minimium inhibitory concentration measurement)", - "flow cytometry" - ] - }, - { - "typeName": "studyAssayPlatform", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "210-MS GC Ion Trap (Varian)", - "220-MS GC Ion Trap (Varian)", - "225-MS GC Ion Trap (Varian)", - "300-MS quadrupole GC/MS (Varian)" - ] - }, - { - "typeName": "studyAssayCellType", - "multiple": true, - "typeClass": "primitive", - "value": [ - "CellType1", - "CellType2" - ] - } - ] - }, - "journal": { - "displayName": "Journal Metadata", - "fields": [ - { - "typeName": "journalVolumeIssue", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "journalVolume": { - "typeName": "journalVolume", - "multiple": false, - "typeClass": "primitive", - "value": "JournalVolume1" - }, - "journalIssue": { - "typeName": "journalIssue", - "multiple": false, - "typeClass": "primitive", - "value": "JournalIssue1" - }, - "journalPubDate": { - "typeName": "journalPubDate", - "multiple": false, - "typeClass": "primitive", - "value": "1008-01-01" - } - }, - { - "journalVolume": { - "typeName": "journalVolume", - "multiple": false, - "typeClass": "primitive", - "value": "JournalVolume2" - }, - "journalIssue": { - "typeName": "journalIssue", - "multiple": false, - "typeClass": "primitive", - "value": "JournalIssue2" - }, - "journalPubDate": { - "typeName": "journalPubDate", - "multiple": false, - "typeClass": "primitive", - "value": "1008-02-01" - } - } - ] - }, - { - "typeName": "journalArticleType", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "abstract" - } - ] - } - } - } -} + "datasetVersion" : { + "license" : { + "name" : "CC0 1.0", + "uri" : "http://creativecommons.org/publicdomain/zero/1.0" + }, + "metadataBlocks" : { + "citation" : { + "displayName" : "Citation Metadata", + "fields" : [{ + "typeName" : "title", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Replication Data for: Title" + }, { + "typeName" : "subtitle", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Subtitle" + }, { + "typeName" : "alternativeTitle", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "Alternative Title" + ] + }, { + "typeName" : "alternativeURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://AlternativeURL.org" + }, { + "typeName" : "otherId", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "otherIdAgency" : { + "typeName" : "otherIdAgency", + "multiple" : false, + "typeClass" : "primitive", + "value" : "OtherIDAgency1" + }, + "otherIdValue" : { + "typeName" : "otherIdValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "OtherIDIdentifier1" + } + }, { + "otherIdAgency" : { + "typeName" : "otherIdAgency", + "multiple" : false, + "typeClass" : "primitive", + "value" : "OtherIDAgency2" + }, + "otherIdValue" : { + "typeName" : "otherIdValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "OtherIDIdentifier2" + } + } + ] + }, { + "typeName" : "author", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "authorName" : { + "typeName" : "authorName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastAuthor1, FirstAuthor1" + }, + "authorAffiliation" : { + "typeName" : "authorAffiliation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "AuthorAffiliation1" + }, + "authorIdentifierScheme" : { + "typeName" : "authorIdentifierScheme", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "ORCID" + }, + "authorIdentifier" : { + "typeName" : "authorIdentifier", + "multiple" : false, + "typeClass" : "primitive", + "value" : "AuthorIdentifier1" + } + }, { + "authorName" : { + "typeName" : "authorName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastAuthor2, FirstAuthor2" + }, + "authorAffiliation" : { + "typeName" : "authorAffiliation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "AuthorAffiliation2" + }, + "authorIdentifierScheme" : { + "typeName" : "authorIdentifierScheme", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "ISNI" + }, + "authorIdentifier" : { + "typeName" : "authorIdentifier", + "multiple" : false, + "typeClass" : "primitive", + "value" : "AuthorIdentifier2" + } + } + ] + }, { + "typeName" : "datasetContact", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "datasetContactName" : { + "typeName" : "datasetContactName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastContact1, FirstContact1" + }, + "datasetContactAffiliation" : { + "typeName" : "datasetContactAffiliation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ContactAffiliation1" + }, + "datasetContactEmail" : { + "typeName" : "datasetContactEmail", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ContactEmail1@mailinator.com" + } + }, { + "datasetContactName" : { + "typeName" : "datasetContactName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastContact2, FirstContact2" + }, + "datasetContactAffiliation" : { + "typeName" : "datasetContactAffiliation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ContactAffiliation2" + }, + "datasetContactEmail" : { + "typeName" : "datasetContactEmail", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ContactEmail2@mailinator.com" + } + } + ] + }, { + "typeName" : "dsDescription", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "dsDescriptionValue" : { + "typeName" : "dsDescriptionValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "DescriptionText1" + }, + "dsDescriptionDate" : { + "typeName" : "dsDescriptionDate", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1000-01-01" + } + }, { + "dsDescriptionValue" : { + "typeName" : "dsDescriptionValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "DescriptionText2" + }, + "dsDescriptionDate" : { + "typeName" : "dsDescriptionDate", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1000-02-02" + } + } + ] + }, { + "typeName" : "subject", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "Agricultural Sciences", + "Business and Management", + "Engineering", + "Law" + ] + }, { + "typeName" : "keyword", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "keywordValue" : { + "typeName" : "keywordValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "KeywordTerm1" + }, + "keywordTermURI" : { + "typeName" : "keywordTermURI", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://keywordTermURI1.org" + }, + "keywordVocabulary" : { + "typeName" : "keywordVocabulary", + "multiple" : false, + "typeClass" : "primitive", + "value" : "KeywordVocabulary1" + }, + "keywordVocabularyURI" : { + "typeName" : "keywordVocabularyURI", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://KeywordVocabularyURL1.org" + } + }, { + "keywordValue" : { + "typeName" : "keywordValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "KeywordTerm2" + }, + "keywordTermURI" : { + "typeName" : "keywordTermURI", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://keywordTermURI2.org" + }, + "keywordVocabulary" : { + "typeName" : "keywordVocabulary", + "multiple" : false, + "typeClass" : "primitive", + "value" : "KeywordVocabulary2" + }, + "keywordVocabularyURI" : { + "typeName" : "keywordVocabularyURI", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://KeywordVocabularyURL2.org" + } + } + ] + }, { + "typeName" : "topicClassification", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "topicClassValue" : { + "typeName" : "topicClassValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Topic Classification Term1" + }, + "topicClassVocab" : { + "typeName" : "topicClassVocab", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Topic Classification Vocab1" + }, + "topicClassVocabURI" : { + "typeName" : "topicClassVocabURI", + "multiple" : false, + "typeClass" : "primitive", + "value" : "https://TopicClassificationURL1.com" + } + }, { + "topicClassValue" : { + "typeName" : "topicClassValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Topic Classification Term2" + }, + "topicClassVocab" : { + "typeName" : "topicClassVocab", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Topic Classification Vocab2" + }, + "topicClassVocabURI" : { + "typeName" : "topicClassVocabURI", + "multiple" : false, + "typeClass" : "primitive", + "value" : "https://TopicClassificationURL2.com" + } + } + ] + }, { + "typeName" : "publication", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "publicationRelationType" : { + "typeName" : "publicationRelationType", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "IsSupplementTo" + }, + "publicationCitation" : { + "typeName" : "publicationCitation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "RelatedPublicationCitation1" + }, + "publicationIDType" : { + "typeName" : "publicationIDType", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "ark" + }, + "publicationIDNumber" : { + "typeName" : "publicationIDNumber", + "multiple" : false, + "typeClass" : "primitive", + "value" : "RelatedPublicationIDNumber1" + }, + "publicationURL" : { + "typeName" : "publicationURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://RelatedPublicationURL1.org" + } + }, { + "publicationCitation" : { + "typeName" : "publicationCitation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "RelatedPublicationCitation2" + }, + "publicationIDType" : { + "typeName" : "publicationIDType", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "arXiv" + }, + "publicationIDNumber" : { + "typeName" : "publicationIDNumber", + "multiple" : false, + "typeClass" : "primitive", + "value" : "RelatedPublicationIDNumber2" + }, + "publicationURL" : { + "typeName" : "publicationURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://RelatedPublicationURL2.org" + } + } + ] + }, { + "typeName" : "notesText", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Notes1" + }, { + "typeName" : "language", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "Abkhaz", + "Afar" + ] + }, { + "typeName" : "producer", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "producerName" : { + "typeName" : "producerName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastProducer1, FirstProducer1" + }, + "producerAffiliation" : { + "typeName" : "producerAffiliation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ProducerAffiliation1" + }, + "producerAbbreviation" : { + "typeName" : "producerAbbreviation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ProducerAbbreviation1" + }, + "producerURL" : { + "typeName" : "producerURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://ProducerURL1.org" + }, + "producerLogoURL" : { + "typeName" : "producerLogoURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://ProducerLogoURL1.org" + } + }, { + "producerName" : { + "typeName" : "producerName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastProducer2, FirstProducer2" + }, + "producerAffiliation" : { + "typeName" : "producerAffiliation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ProducerAffiliation2" + }, + "producerAbbreviation" : { + "typeName" : "producerAbbreviation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ProducerAbbreviation2" + }, + "producerURL" : { + "typeName" : "producerURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://ProducerURL2.org" + }, + "producerLogoURL" : { + "typeName" : "producerLogoURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://ProducerLogoURL2.org" + } + } + ] + }, { + "typeName" : "productionDate", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1003-01-01" + }, { + "typeName" : "productionPlace", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "ProductionPlace" + ] + }, { + "typeName" : "contributor", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "contributorType" : { + "typeName" : "contributorType", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "Data Collector" + }, + "contributorName" : { + "typeName" : "contributorName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastContributor1, FirstContributor1" + } + }, { + "contributorType" : { + "typeName" : "contributorType", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "Data Curator" + }, + "contributorName" : { + "typeName" : "contributorName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastContributor2, FirstContributor2" + } + } + ] + }, { + "typeName" : "grantNumber", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "grantNumberAgency" : { + "typeName" : "grantNumberAgency", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GrantInformationGrantAgency1" + }, + "grantNumberValue" : { + "typeName" : "grantNumberValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GrantInformationGrantNumber1" + } + }, { + "grantNumberAgency" : { + "typeName" : "grantNumberAgency", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GrantInformationGrantAgency2" + }, + "grantNumberValue" : { + "typeName" : "grantNumberValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GrantInformationGrantNumber2" + } + } + ] + }, { + "typeName" : "distributor", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "distributorName" : { + "typeName" : "distributorName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastDistributor1, FirstDistributor1" + }, + "distributorAffiliation" : { + "typeName" : "distributorAffiliation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "DistributorAffiliation1" + }, + "distributorAbbreviation" : { + "typeName" : "distributorAbbreviation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "DistributorAbbreviation1" + }, + "distributorURL" : { + "typeName" : "distributorURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://DistributorURL1.org" + }, + "distributorLogoURL" : { + "typeName" : "distributorLogoURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://DistributorLogoURL1.org" + } + }, { + "distributorName" : { + "typeName" : "distributorName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastDistributor2, FirstDistributor2" + }, + "distributorAffiliation" : { + "typeName" : "distributorAffiliation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "DistributorAffiliation2" + }, + "distributorAbbreviation" : { + "typeName" : "distributorAbbreviation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "DistributorAbbreviation2" + }, + "distributorURL" : { + "typeName" : "distributorURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://DistributorURL2.org" + }, + "distributorLogoURL" : { + "typeName" : "distributorLogoURL", + "multiple" : false, + "typeClass" : "primitive", + "value" : "http://DistributorLogoURL2.org" + } + } + ] + }, { + "typeName" : "distributionDate", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1004-01-01" + }, { + "typeName" : "depositor", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastDepositor, FirstDepositor" + }, { + "typeName" : "dateOfDeposit", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1002-01-01" + }, { + "typeName" : "timePeriodCovered", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "timePeriodCoveredStart" : { + "typeName" : "timePeriodCoveredStart", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1005-01-01" + }, + "timePeriodCoveredEnd" : { + "typeName" : "timePeriodCoveredEnd", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1005-01-02" + } + }, { + "timePeriodCoveredStart" : { + "typeName" : "timePeriodCoveredStart", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1005-02-01" + }, + "timePeriodCoveredEnd" : { + "typeName" : "timePeriodCoveredEnd", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1005-02-02" + } + } + ] + }, { + "typeName" : "dateOfCollection", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "dateOfCollectionStart" : { + "typeName" : "dateOfCollectionStart", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1006-01-01" + }, + "dateOfCollectionEnd" : { + "typeName" : "dateOfCollectionEnd", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1006-01-01" + } + }, { + "dateOfCollectionStart" : { + "typeName" : "dateOfCollectionStart", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1006-02-01" + }, + "dateOfCollectionEnd" : { + "typeName" : "dateOfCollectionEnd", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1006-02-02" + } + } + ] + }, { + "typeName" : "kindOfData", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "KindOfData1", + "KindOfData2" + ] + }, { + "typeName" : "series", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "seriesName" : { + "typeName" : "seriesName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SeriesName" + }, + "seriesInformation" : { + "typeName" : "seriesInformation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SeriesInformation" + } + } + ] + }, { + "typeName" : "software", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "softwareName" : { + "typeName" : "softwareName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SoftwareName1" + }, + "softwareVersion" : { + "typeName" : "softwareVersion", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SoftwareVersion1" + } + }, { + "softwareName" : { + "typeName" : "softwareName", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SoftwareName2" + }, + "softwareVersion" : { + "typeName" : "softwareVersion", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SoftwareVersion2" + } + } + ] + }, { + "typeName" : "relatedMaterial", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "RelatedMaterial1", + "RelatedMaterial2" + ] + }, { + "typeName" : "relatedDatasets", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "RelatedDatasets1", + "RelatedDatasets2" + ] + }, { + "typeName" : "otherReferences", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "OtherReferences1", + "OtherReferences2" + ] + }, { + "typeName" : "dataSources", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "DataSources1", + "DataSources2" + ] + }, { + "typeName" : "originOfSources", + "multiple" : false, + "typeClass" : "primitive", + "value" : "OriginOfSources" + }, { + "typeName" : "characteristicOfSources", + "multiple" : false, + "typeClass" : "primitive", + "value" : "CharacteristicOfSourcesNoted" + }, { + "typeName" : "accessToSources", + "multiple" : false, + "typeClass" : "primitive", + "value" : "DocumentationAndAccessToSources" + } + ] + }, + "geospatial" : { + "displayName" : "Geospatial Metadata", + "fields" : [{ + "typeName" : "geographicCoverage", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "country" : { + "typeName" : "country", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "Afghanistan" + }, + "state" : { + "typeName" : "state", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GeographicCoverageStateProvince1" + }, + "city" : { + "typeName" : "city", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GeographicCoverageCity1" + }, + "otherGeographicCoverage" : { + "typeName" : "otherGeographicCoverage", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GeographicCoverageOther1" + } + }, { + "country" : { + "typeName" : "country", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "Albania" + }, + "state" : { + "typeName" : "state", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GeographicCoverageStateProvince2" + }, + "city" : { + "typeName" : "city", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GeographicCoverageCity2" + }, + "otherGeographicCoverage" : { + "typeName" : "otherGeographicCoverage", + "multiple" : false, + "typeClass" : "primitive", + "value" : "GeographicCoverageOther2" + } + } + ] + }, { + "typeName" : "geographicUnit", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "GeographicUnit1", + "GeographicUnit2" + ] + }, { + "typeName" : "geographicBoundingBox", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "westLongitude" : { + "typeName" : "westLongitude", + "multiple" : false, + "typeClass" : "primitive", + "value" : "-72" + }, + "eastLongitude" : { + "typeName" : "eastLongitude", + "multiple" : false, + "typeClass" : "primitive", + "value" : "-70" + }, + "northLatitude" : { + "typeName" : "northLatitude", + "multiple" : false, + "typeClass" : "primitive", + "value" : "43" + }, + "southLatitude" : { + "typeName" : "southLatitude", + "multiple" : false, + "typeClass" : "primitive", + "value" : "42" + } + }, { + "westLongitude" : { + "typeName" : "westLongitude", + "multiple" : false, + "typeClass" : "primitive", + "value" : "-18" + }, + "eastLongitude" : { + "typeName" : "eastLongitude", + "multiple" : false, + "typeClass" : "primitive", + "value" : "-13" + }, + "northLatitude" : { + "typeName" : "northLatitude", + "multiple" : false, + "typeClass" : "primitive", + "value" : "29" + }, + "southLatitude" : { + "typeName" : "southLatitude", + "multiple" : false, + "typeClass" : "primitive", + "value" : "28" + } + } + ] + } + ] + }, + "socialscience" : { + "displayName" : "Social Science and Humanities Metadata", + "fields" : [{ + "typeName" : "unitOfAnalysis", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "UnitOfAnalysis1", + "UnitOfAnalysis2" + ] + }, { + "typeName" : "universe", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "Universe1", + "Universe2" + ] + }, { + "typeName" : "timeMethod", + "multiple" : false, + "typeClass" : "primitive", + "value" : "TimeMethod" + }, { + "typeName" : "dataCollector", + "multiple" : false, + "typeClass" : "primitive", + "value" : "LastDataCollector1, FirstDataCollector1" + }, { + "typeName" : "collectorTraining", + "multiple" : false, + "typeClass" : "primitive", + "value" : "CollectorTraining" + }, { + "typeName" : "frequencyOfDataCollection", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Frequency" + }, { + "typeName" : "samplingProcedure", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SamplingProcedure" + }, { + "typeName" : "targetSampleSize", + "multiple" : false, + "typeClass" : "compound", + "value" : { + "targetSampleActualSize" : { + "typeName" : "targetSampleActualSize", + "multiple" : false, + "typeClass" : "primitive", + "value" : "100" + }, + "targetSampleSizeFormula" : { + "typeName" : "targetSampleSizeFormula", + "multiple" : false, + "typeClass" : "primitive", + "value" : "TargetSampleSizeFormula" + } + } + }, { + "typeName" : "deviationsFromSampleDesign", + "multiple" : false, + "typeClass" : "primitive", + "value" : "MajorDeviationsForSampleDesign" + }, { + "typeName" : "collectionMode", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "CollectionMode" + ] + }, { + "typeName" : "researchInstrument", + "multiple" : false, + "typeClass" : "primitive", + "value" : "TypeOfResearchInstrument" + }, { + "typeName" : "dataCollectionSituation", + "multiple" : false, + "typeClass" : "primitive", + "value" : "CharacteristicsOfDataCollectionSituation" + }, { + "typeName" : "actionsToMinimizeLoss", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ActionsToMinimizeLosses" + }, { + "typeName" : "controlOperations", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ControlOperations" + }, { + "typeName" : "weighting", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Weighting" + }, { + "typeName" : "cleaningOperations", + "multiple" : false, + "typeClass" : "primitive", + "value" : "CleaningOperations" + }, { + "typeName" : "datasetLevelErrorNotes", + "multiple" : false, + "typeClass" : "primitive", + "value" : "StudyLevelErrorNotes" + }, { + "typeName" : "responseRate", + "multiple" : false, + "typeClass" : "primitive", + "value" : "ResponseRate" + }, { + "typeName" : "samplingErrorEstimates", + "multiple" : false, + "typeClass" : "primitive", + "value" : "EstimatesOfSamplingError" + }, { + "typeName" : "otherDataAppraisal", + "multiple" : false, + "typeClass" : "primitive", + "value" : "OtherFormsOfDataAppraisal" + }, { + "typeName" : "socialScienceNotes", + "multiple" : false, + "typeClass" : "compound", + "value" : { + "socialScienceNotesType" : { + "typeName" : "socialScienceNotesType", + "multiple" : false, + "typeClass" : "primitive", + "value" : "NotesType" + }, + "socialScienceNotesSubject" : { + "typeName" : "socialScienceNotesSubject", + "multiple" : false, + "typeClass" : "primitive", + "value" : "NotesSubject" + }, + "socialScienceNotesText" : { + "typeName" : "socialScienceNotesText", + "multiple" : false, + "typeClass" : "primitive", + "value" : "NotesText" + } + } + } + ] + }, + "astrophysics" : { + "displayName" : "Astronomy and Astrophysics Metadata", + "fields" : [{ + "typeName" : "astroType", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "Image", + "Mosaic", + "EventList", + "Cube" + ] + }, { + "typeName" : "astroFacility", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "Facility1", + "Facility2" + ] + }, { + "typeName" : "astroInstrument", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "Instrument1", + "Instrument2" + ] + }, { + "typeName" : "astroObject", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "Object1", + "Object2" + ] + }, { + "typeName" : "resolution.Spatial", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SpatialResolution" + }, { + "typeName" : "resolution.Spectral", + "multiple" : false, + "typeClass" : "primitive", + "value" : "SpectralResolution" + }, { + "typeName" : "resolution.Temporal", + "multiple" : false, + "typeClass" : "primitive", + "value" : "TimeResolution" + }, { + "typeName" : "coverage.Spectral.Bandpass", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "Bandpass1", + "Bandpass2" + ] + }, { + "typeName" : "coverage.Spectral.CentralWavelength", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "3001", + "3002" + ] + }, { + "typeName" : "coverage.Spectral.Wavelength", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "coverage.Spectral.MinimumWavelength" : { + "typeName" : "coverage.Spectral.MinimumWavelength", + "multiple" : false, + "typeClass" : "primitive", + "value" : "4001" + }, + "coverage.Spectral.MaximumWavelength" : { + "typeName" : "coverage.Spectral.MaximumWavelength", + "multiple" : false, + "typeClass" : "primitive", + "value" : "4002" + } + }, { + "coverage.Spectral.MinimumWavelength" : { + "typeName" : "coverage.Spectral.MinimumWavelength", + "multiple" : false, + "typeClass" : "primitive", + "value" : "4003" + }, + "coverage.Spectral.MaximumWavelength" : { + "typeName" : "coverage.Spectral.MaximumWavelength", + "multiple" : false, + "typeClass" : "primitive", + "value" : "4004" + } + } + ] + }, { + "typeName" : "coverage.Temporal", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "coverage.Temporal.StartTime" : { + "typeName" : "coverage.Temporal.StartTime", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1007-01-01" + }, + "coverage.Temporal.StopTime" : { + "typeName" : "coverage.Temporal.StopTime", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1007-01-02" + } + }, { + "coverage.Temporal.StartTime" : { + "typeName" : "coverage.Temporal.StartTime", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1007-02-01" + }, + "coverage.Temporal.StopTime" : { + "typeName" : "coverage.Temporal.StopTime", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1007-02-02" + } + } + ] + }, { + "typeName" : "coverage.Spatial", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "SkyCoverage1", + "SkyCoverage2" + ] + }, { + "typeName" : "coverage.Depth", + "multiple" : false, + "typeClass" : "primitive", + "value" : "200" + }, { + "typeName" : "coverage.ObjectDensity", + "multiple" : false, + "typeClass" : "primitive", + "value" : "300" + }, { + "typeName" : "coverage.ObjectCount", + "multiple" : false, + "typeClass" : "primitive", + "value" : "400" + }, { + "typeName" : "coverage.SkyFraction", + "multiple" : false, + "typeClass" : "primitive", + "value" : "500" + }, { + "typeName" : "coverage.Polarization", + "multiple" : false, + "typeClass" : "primitive", + "value" : "Polarization" + }, { + "typeName" : "redshiftType", + "multiple" : false, + "typeClass" : "primitive", + "value" : "RedshiftType" + }, { + "typeName" : "resolution.Redshift", + "multiple" : false, + "typeClass" : "primitive", + "value" : "600" + }, { + "typeName" : "coverage.RedshiftValue", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "coverage.Redshift.MinimumValue" : { + "typeName" : "coverage.Redshift.MinimumValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "701" + }, + "coverage.Redshift.MaximumValue" : { + "typeName" : "coverage.Redshift.MaximumValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "702" + } + }, { + "coverage.Redshift.MinimumValue" : { + "typeName" : "coverage.Redshift.MinimumValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "703" + }, + "coverage.Redshift.MaximumValue" : { + "typeName" : "coverage.Redshift.MaximumValue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "704" + } + } + ] + } + ] + }, + "biomedical" : { + "displayName" : "Life Sciences Metadata", + "fields" : [{ + "typeName" : "studyDesignType", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "Case Control", + "Cross Sectional", + "Cohort Study", + "Not Specified" + ] + }, { + "typeName" : "studyFactorType", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "Age", + "Biomarkers", + "Cell Surface Markers", + "Developmental Stage" + ] + }, { + "typeName" : "studyAssayOrganism", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "Arabidopsis thaliana", + "Bos taurus", + "Caenorhabditis elegans", + "Danio rerio (zebrafish)" + ] + }, { + "typeName" : "studyAssayOtherOrganism", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "OtherOrganism1", + "OtherOrganism2" + ] + }, { + "typeName" : "studyAssayMeasurementType", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "genome sequencing", + "cell sorting", + "clinical chemistry analysis", + "DNA methylation profiling" + ] + }, { + "typeName" : "studyAssayOtherMeasurmentType", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "OtherMeasurementType1", + "OtherMeasurementType2" + ] + }, { + "typeName" : "studyAssayTechnologyType", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "culture based drug susceptibility testing, single concentration", + "culture based drug susceptibility testing, two concentrations", + "culture based drug susceptibility testing, three or more concentrations (minimium inhibitory concentration measurement)", + "flow cytometry" + ] + }, { + "typeName" : "studyAssayPlatform", + "multiple" : true, + "typeClass" : "controlledVocabulary", + "value" : [ + "210-MS GC Ion Trap (Varian)", + "220-MS GC Ion Trap (Varian)", + "225-MS GC Ion Trap (Varian)", + "300-MS quadrupole GC/MS (Varian)" + ] + }, { + "typeName" : "studyAssayCellType", + "multiple" : true, + "typeClass" : "primitive", + "value" : [ + "CellType1", + "CellType2" + ] + } + ] + }, + "journal" : { + "displayName" : "Journal Metadata", + "fields" : [{ + "typeName" : "journalVolumeIssue", + "multiple" : true, + "typeClass" : "compound", + "value" : [{ + "journalVolume" : { + "typeName" : "journalVolume", + "multiple" : false, + "typeClass" : "primitive", + "value" : "JournalVolume1" + }, + "journalIssue" : { + "typeName" : "journalIssue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "JournalIssue1" + }, + "journalPubDate" : { + "typeName" : "journalPubDate", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1008-01-01" + } + }, { + "journalVolume" : { + "typeName" : "journalVolume", + "multiple" : false, + "typeClass" : "primitive", + "value" : "JournalVolume2" + }, + "journalIssue" : { + "typeName" : "journalIssue", + "multiple" : false, + "typeClass" : "primitive", + "value" : "JournalIssue2" + }, + "journalPubDate" : { + "typeName" : "journalPubDate", + "multiple" : false, + "typeClass" : "primitive", + "value" : "1008-02-01" + } + } + ] + }, { + "typeName" : "journalArticleType", + "multiple" : false, + "typeClass" : "controlledVocabulary", + "value" : "abstract" + } + ] + } + } + } +} \ No newline at end of file diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index 18354f2b1f7..db4f1c9f30c 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -31,55 +31,56 @@ topicClassVocab Controlled Vocabulary Name The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) text 27 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation topicClassVocabURI Controlled Vocabulary URL The URL where one can access information about the term's controlled vocabulary https:// url 28 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE topicClassification citation publication Related Publication The article or report that uses the data in the Dataset. The full list of related publications will be displayed on the metadata tab none 29 FALSE FALSE TRUE FALSE TRUE FALSE citation http://purl.org/dc/terms/isReferencedBy - publicationCitation Citation The full bibliographic citation for the related publication textbox 30 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/dc/terms/bibliographicCitation - publicationIDType Identifier Type The type of identifier that uniquely identifies a related publication text 31 #VALUE: TRUE TRUE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifierScheme - publicationIDNumber Identifier The identifier for a related publication text 32 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifier - publicationURL URL The URL form of the identifier entered in the Identifier field, e.g. the DOI URL if a DOI was entered in the Identifier field. Used to display what was entered in the ID Type and ID Number fields as a link. If what was entered in the Identifier field has no URL form, the URL of the publication webpage is used, e.g. a journal article webpage https:// url 33 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE publication citation https://schema.org/distribution - notesText Notes Additional information about the Dataset textbox 34 FALSE FALSE FALSE FALSE TRUE FALSE citation - language Language A language that the Dataset's files is written in text 35 TRUE TRUE TRUE TRUE FALSE FALSE citation http://purl.org/dc/terms/language - producer Producer The entity, such a person or organization, managing the finances or other administrative processes involved in the creation of the Dataset none 36 FALSE FALSE TRUE FALSE FALSE FALSE citation - producerName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 37 #VALUE TRUE FALSE FALSE TRUE FALSE TRUE producer citation - producerAffiliation Affiliation The name of the entity affiliated with the producer, e.g. an organization's name Organization XYZ text 38 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerAbbreviation Abbreviated Name The producer's abbreviated name (e.g. IQSS, ICPSR) text 39 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerURL URL The URL of the producer's website https:// url 40 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE producer citation - producerLogoURL Logo URL The URL of the producer's logo https:// url 41
FALSE FALSE FALSE FALSE FALSE FALSE producer citation - productionDate Production Date The date when the data were produced (not distributed, published, or archived) YYYY-MM-DD date 42 TRUE FALSE FALSE TRUE FALSE FALSE citation - productionPlace Production Location The location where the data and any related materials were produced or collected text 43 TRUE FALSE TRUE TRUE FALSE FALSE citation - contributor Contributor The entity, such as a person or organization, responsible for collecting, managing, or otherwise contributing to the development of the Dataset none 44 : FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/contributor - contributorType Type Indicates the type of contribution made to the dataset text 45 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE contributor citation - contributorName Name The name of the contributor, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 46 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE contributor citation - grantNumber Funding Information Information about the Dataset's financial support none 47 : FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/sponsor - grantNumberAgency Agency The agency that provided financial support for the Dataset Organization XYZ text 48 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation - grantNumberValue Identifier The grant identifier or contract identifier of the agency that provided financial support for the Dataset text 49 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation - distributor Distributor The entity, such as a person or organization, designated to generate copies of the Dataset, including any editions or revisions none 50 FALSE FALSE TRUE FALSE FALSE FALSE citation - distributorName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 51 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE distributor citation - distributorAffiliation Affiliation The name of the entity affiliated with the distributor, e.g. an organization's name Organization XYZ text 52 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorAbbreviation Abbreviated Name The distributor's abbreviated name (e.g. IQSS, ICPSR) text 53 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorURL URL The URL of the distributor's webpage https:// url 54 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributorLogoURL Logo URL The URL of the distributor's logo image, used to show the image on the Dataset's page https:// url 55
FALSE FALSE FALSE FALSE FALSE FALSE distributor citation - distributionDate Distribution Date The date when the Dataset was made available for distribution/presentation YYYY-MM-DD date 56 TRUE FALSE FALSE TRUE FALSE FALSE citation - depositor Depositor The entity, such as a person or organization, that deposited the Dataset in the repository 1) FamilyName, GivenName or 2) Organization text 57 FALSE FALSE FALSE FALSE FALSE FALSE citation - dateOfDeposit Deposit Date The date when the Dataset was deposited into the repository YYYY-MM-DD date 58 FALSE FALSE FALSE TRUE FALSE FALSE citation http://purl.org/dc/terms/dateSubmitted - timePeriodCovered Time Period The time period that the data refer to. Also known as span. This is the time period covered by the data, not the dates of coding, collecting data, or making documents machine-readable none 59 ; FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/temporalCoverage - timePeriodCoveredStart Start Date The start date of the time period that the data refer to YYYY-MM-DD date 60 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation - timePeriodCoveredEnd End Date The end date of the time period that the data refer to YYYY-MM-DD date 61 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation - dateOfCollection Date of Collection The dates when the data were collected or generated none 62 ; FALSE FALSE TRUE FALSE FALSE FALSE citation - dateOfCollectionStart Start Date The date when the data collection started YYYY-MM-DD date 63 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation - dateOfCollectionEnd End Date The date when the data collection ended YYYY-MM-DD date 64 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation - kindOfData Data Type The type of data included in the files (e.g. survey data, clinical data, or machine-readable text) text 65 TRUE FALSE TRUE TRUE FALSE FALSE citation http://rdf-vocabulary.ddialliance.org/discovery#kindOfData - series Series Information about the dataset series to which the Dataset belong none 66 : FALSE FALSE TRUE FALSE FALSE FALSE citation - seriesName Name The name of the dataset series text 67 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE series citation - seriesInformation Information Can include 1) a history of the series and 2) a summary of features that apply to the series textbox 68 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE series citation - software Software Information about the software used to generate the Dataset none 69 , FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasGeneratedBy - softwareName Name The name of software used to generate the Dataset text 70 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation - softwareVersion Version The version of the software used to generate the Dataset, e.g. 4.11 text 71 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation - relatedMaterial Related Material Information, such as a persistent ID or citation, about the material related to the Dataset, such as appendices or sampling information available outside of the Dataset textbox 72 FALSE FALSE TRUE FALSE FALSE FALSE citation - relatedDatasets Related Dataset Information, such as a persistent ID or citation, about a related dataset, such as previous research on the Dataset's subject textbox 73 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/relation - otherReferences Other Reference Information, such as a persistent ID or citation, about another type of resource that provides background or supporting material to the Dataset text 74 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/references - dataSources Data Source Information, such as a persistent ID or citation, about sources of the Dataset (e.g. a book, article, serial, or machine-readable data file) textbox 75 FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasDerivedFrom - originOfSources Origin of Historical Sources For historical sources, the origin and any rules followed in establishing them as sources textbox 76 FALSE FALSE FALSE FALSE FALSE FALSE citation - characteristicOfSources Characteristic of Sources Characteristics not already noted elsewhere textbox 77 FALSE FALSE FALSE FALSE FALSE FALSE citation - accessToSources Documentation and Access to Sources 1) Methods or procedures for accessing data sources and 2) any special permissions needed for access textbox 78 FALSE FALSE FALSE FALSE FALSE FALSE citation + publicationRelationType Relation Type The nature of the relationship between this Dataset and the related publication text 30 #VALUE: TRUE TRUE FALSE FALSE TRUE FALSE publication citation http://datacite.org/schema/kernel-4/simpleTypes#relationType + publicationCitation Citation The full bibliographic citation for the related publication textbox 31 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/dc/terms/bibliographicCitation + publicationIDType Identifier Type The type of identifier that uniquely identifies a related publication text 32 #VALUE: TRUE TRUE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifierScheme + publicationIDNumber Identifier The identifier for a related publication text 33 #VALUE TRUE FALSE FALSE FALSE TRUE FALSE publication citation http://purl.org/spar/datacite/ResourceIdentifier + publicationURL URL The URL form of the identifier entered in the Identifier field, e.g. the DOI URL if a DOI was entered in the Identifier field. Used to display what was entered in the ID Type and ID Number fields as a link. If what was entered in the Identifier field has no URL form, the URL of the publication webpage is used, e.g. a journal article webpage https:// url 34 #VALUE FALSE FALSE FALSE FALSE TRUE FALSE publication citation https://schema.org/distribution + notesText Notes Additional information about the Dataset textbox 35 FALSE FALSE FALSE FALSE TRUE FALSE citation + language Language A language that the Dataset's files is written in text 36 TRUE TRUE TRUE TRUE FALSE FALSE citation http://purl.org/dc/terms/language + producer Producer The entity, such a person or organization, managing the finances or other administrative processes involved in the creation of the Dataset none 37 FALSE FALSE TRUE FALSE FALSE FALSE citation + producerName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 38 #VALUE TRUE FALSE FALSE TRUE FALSE TRUE producer citation + producerAffiliation Affiliation The name of the entity affiliated with the producer, e.g. an organization's name Organization XYZ text 39 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerAbbreviation Abbreviated Name The producer's abbreviated name (e.g. IQSS, ICPSR) text 40 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerURL URL The URL of the producer's website https:// url 41 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE producer citation + producerLogoURL Logo URL The URL of the producer's logo https:// url 42
FALSE FALSE FALSE FALSE FALSE FALSE producer citation + productionDate Production Date The date when the data were produced (not distributed, published, or archived) YYYY-MM-DD date 43 TRUE FALSE FALSE TRUE FALSE FALSE citation + productionPlace Production Location The location where the data and any related materials were produced or collected text 44 TRUE FALSE TRUE TRUE FALSE FALSE citation + contributor Contributor The entity, such as a person or organization, responsible for collecting, managing, or otherwise contributing to the development of the Dataset none 45 : FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/contributor + contributorType Type Indicates the type of contribution made to the dataset text 46 #VALUE TRUE TRUE FALSE TRUE FALSE FALSE contributor citation + contributorName Name The name of the contributor, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 47 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE contributor citation + grantNumber Funding Information Information about the Dataset's financial support none 48 : FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/sponsor + grantNumberAgency Agency The agency that provided financial support for the Dataset Organization XYZ text 49 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation + grantNumberValue Identifier The grant identifier or contract identifier of the agency that provided financial support for the Dataset text 50 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE grantNumber citation + distributor Distributor The entity, such as a person or organization, designated to generate copies of the Dataset, including any editions or revisions none 51 FALSE FALSE TRUE FALSE FALSE FALSE citation + distributorName Name The name of the entity, e.g. the person's name or the name of an organization 1) FamilyName, GivenName or 2) Organization text 52 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE distributor citation + distributorAffiliation Affiliation The name of the entity affiliated with the distributor, e.g. an organization's name Organization XYZ text 53 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorAbbreviation Abbreviated Name The distributor's abbreviated name (e.g. IQSS, ICPSR) text 54 (#VALUE) FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorURL URL The URL of the distributor's webpage https:// url 55 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributorLogoURL Logo URL The URL of the distributor's logo image, used to show the image on the Dataset's page https:// url 56
FALSE FALSE FALSE FALSE FALSE FALSE distributor citation + distributionDate Distribution Date The date when the Dataset was made available for distribution/presentation YYYY-MM-DD date 57 TRUE FALSE FALSE TRUE FALSE FALSE citation + depositor Depositor The entity, such as a person or organization, that deposited the Dataset in the repository 1) FamilyName, GivenName or 2) Organization text 58 FALSE FALSE FALSE FALSE FALSE FALSE citation + dateOfDeposit Deposit Date The date when the Dataset was deposited into the repository YYYY-MM-DD date 59 FALSE FALSE FALSE TRUE FALSE FALSE citation http://purl.org/dc/terms/dateSubmitted + timePeriodCovered Time Period The time period that the data refer to. Also known as span. This is the time period covered by the data, not the dates of coding, collecting data, or making documents machine-readable none 60 ; FALSE FALSE TRUE FALSE FALSE FALSE citation https://schema.org/temporalCoverage + timePeriodCoveredStart Start Date The start date of the time period that the data refer to YYYY-MM-DD date 61 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation + timePeriodCoveredEnd End Date The end date of the time period that the data refer to YYYY-MM-DD date 62 #NAME: #VALUE TRUE FALSE FALSE TRUE FALSE FALSE timePeriodCovered citation + dateOfCollection Date of Collection The dates when the data were collected or generated none 63 ; FALSE FALSE TRUE FALSE FALSE FALSE citation + dateOfCollectionStart Start Date The date when the data collection started YYYY-MM-DD date 64 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation + dateOfCollectionEnd End Date The date when the data collection ended YYYY-MM-DD date 65 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE dateOfCollection citation + kindOfData Data Type The type of data included in the files (e.g. survey data, clinical data, or machine-readable text) text 66 TRUE FALSE TRUE TRUE FALSE FALSE citation http://rdf-vocabulary.ddialliance.org/discovery#kindOfData + series Series Information about the dataset series to which the Dataset belong none 67 : FALSE FALSE TRUE FALSE FALSE FALSE citation + seriesName Name The name of the dataset series text 68 #VALUE TRUE FALSE FALSE TRUE FALSE FALSE series citation + seriesInformation Information Can include 1) a history of the series and 2) a summary of features that apply to the series textbox 69 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE series citation + software Software Information about the software used to generate the Dataset none 70 , FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasGeneratedBy + softwareName Name The name of software used to generate the Dataset text 71 #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation + softwareVersion Version The version of the software used to generate the Dataset, e.g. 4.11 text 72 #NAME: #VALUE FALSE FALSE FALSE FALSE FALSE FALSE software citation + relatedMaterial Related Material Information, such as a persistent ID or citation, about the material related to the Dataset, such as appendices or sampling information available outside of the Dataset textbox 73 FALSE FALSE TRUE FALSE FALSE FALSE citation + relatedDatasets Related Dataset Information, such as a persistent ID or citation, about a related dataset, such as previous research on the Dataset's subject textbox 74 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/relation + otherReferences Other Reference Information, such as a persistent ID or citation, about another type of resource that provides background or supporting material to the Dataset text 75 FALSE FALSE TRUE FALSE FALSE FALSE citation http://purl.org/dc/terms/references + dataSources Data Source Information, such as a persistent ID or citation, about sources of the Dataset (e.g. a book, article, serial, or machine-readable data file) textbox 76 FALSE FALSE TRUE FALSE FALSE FALSE citation https://www.w3.org/TR/prov-o/#wasDerivedFrom + originOfSources Origin of Historical Sources For historical sources, the origin and any rules followed in establishing them as sources textbox 77 FALSE FALSE FALSE FALSE FALSE FALSE citation + characteristicOfSources Characteristic of Sources Characteristics not already noted elsewhere textbox 78 FALSE FALSE FALSE FALSE FALSE FALSE citation + accessToSources Documentation and Access to Sources 1) Methods or procedures for accessing data sources and 2) any special permissions needed for access textbox 79 FALSE FALSE FALSE FALSE FALSE FALSE citation #controlledVocabulary DatasetField Value identifier displayOrder subject Agricultural Sciences D01 0 subject Arts and Humanities D0 1 @@ -325,3 +326,9 @@ language Zhuang, Chuang zha 183 zha za Zhuang Chuang language Zulu zul 184 zul zu language Not applicable 185 + publicationRelationType IsCitedBy RT1 1 + publicationRelationType Cites RT2 2 + publicationRelationType IsSupplementTo RT3 3 + publicationRelationType IsSupplementedBy RT4 4 + publicationRelationType IsReferencedBy RT5 5 + publicationRelationType References RT6 6 \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java index 1313f3415ab..53ab6c7bef7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetFieldConstant.java @@ -40,6 +40,7 @@ public class DatasetFieldConstant implements java.io.Serializable { public final static String note = "note"; + public final static String publicationRelationType = "publicationRelationType"; public final static String publicationCitation = "publicationCitation"; public final static String publicationIDType = "publicationIDType"; public final static String publicationIDNumber = "publicationIDNumber"; diff --git a/src/main/java/edu/harvard/iq/dataverse/export/dublincore/DublinCoreExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/dublincore/DublinCoreExportUtil.java index 6b7cb844f3e..e74a2f26af6 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/dublincore/DublinCoreExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/dublincore/DublinCoreExportUtil.java @@ -28,6 +28,8 @@ import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamWriter; +import org.apache.commons.lang3.StringUtils; + /** * * @author skraffmi @@ -301,26 +303,35 @@ private static void writeRelPublElement(XMLStreamWriter xmlw, DatasetVersionDTO String IDType = ""; String IDNo = ""; String url = ""; + String relationType = null; for (Iterator iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); - if (DatasetFieldConstant.publicationCitation.equals(next.getTypeName())) { - citation = next.getSinglePrimitive(); - } - if (DatasetFieldConstant.publicationIDType.equals(next.getTypeName())) { - IDType = next.getSinglePrimitive(); - } - if (DatasetFieldConstant.publicationIDNumber.equals(next.getTypeName())) { - IDNo = next.getSinglePrimitive(); - } - if (DatasetFieldConstant.publicationURL.equals(next.getTypeName())) { - url = next.getSinglePrimitive(); + switch (next.getTypeName()) { + case DatasetFieldConstant.publicationCitation: + citation = next.getSinglePrimitive(); + break; + case DatasetFieldConstant.publicationIDType: + IDType = next.getSinglePrimitive(); + break; + case DatasetFieldConstant.publicationIDNumber: + IDNo = next.getSinglePrimitive(); + break; + case DatasetFieldConstant.publicationURL: + url = next.getSinglePrimitive(); + break; + case DatasetFieldConstant.publicationRelationType: + relationType = next.getSinglePrimitive(); + break; } } + if(StringUtils.isBlank(relationType)) { + relationType = "isReferencedBy"; + } pubString = appendCommaSeparatedValue(citation, IDType); pubString = appendCommaSeparatedValue(pubString, IDNo); pubString = appendCommaSeparatedValue(pubString, url); if (!pubString.isEmpty()){ - xmlw.writeStartElement(dcFlavor+":"+"isReferencedBy"); + xmlw.writeStartElement(dcFlavor+":" + relationType); xmlw.writeCharacters(pubString); xmlw.writeEndElement(); //relPubl } diff --git a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java index b4b5e597365..dd01750942d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/export/openaire/OpenAireExportUtil.java @@ -932,6 +932,7 @@ public static void writeRelatedIdentifierElement(XMLStreamWriter xmlw, DatasetVe String relatedIdentifierType = null; String relatedIdentifier = null; // is used when relatedIdentifierType variable is not URL String relatedURL = null; // is used when relatedIdentifierType variable is URL + String relationType = null; // is used when relatedIdentifierType variable is URL for (Iterator iterator = fieldDTOs.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); @@ -944,6 +945,9 @@ public static void writeRelatedIdentifierElement(XMLStreamWriter xmlw, DatasetVe if (DatasetFieldConstant.publicationURL.equals(next.getTypeName())) { relatedURL = next.getSinglePrimitive(); } + if (DatasetFieldConstant.publicationRelationType.equals(next.getTypeName())) { + relationType = next.getSinglePrimitive(); + } } if (StringUtils.isNotBlank(relatedIdentifierType)) { @@ -956,7 +960,10 @@ public static void writeRelatedIdentifierElement(XMLStreamWriter xmlw, DatasetVe } relatedIdentifier_map.put("relatedIdentifierType", relatedIdentifierType); - relatedIdentifier_map.put("relationType", "IsCitedBy"); + if(relationType== null) { + relationType = "IsCitedBy"; + } + relatedIdentifier_map.put("relationType", relationType); if (StringUtils.containsIgnoreCase(relatedIdentifierType, "url")) { writeFullElement(xmlw, null, "relatedIdentifier", relatedIdentifier_map, relatedURL, language); diff --git a/src/main/java/propertyFiles/citation.properties b/src/main/java/propertyFiles/citation.properties index cb864eb78e9..a52a599cff3 100644 --- a/src/main/java/propertyFiles/citation.properties +++ b/src/main/java/propertyFiles/citation.properties @@ -31,6 +31,7 @@ datasetfieldtype.topicClassValue.title=Term datasetfieldtype.topicClassVocab.title=Controlled Vocabulary Name datasetfieldtype.topicClassVocabURI.title=Controlled Vocabulary URL datasetfieldtype.publication.title=Related Publication +datasetfieldtype.publicationRelationType.title=Relation Type datasetfieldtype.publicationCitation.title=Citation datasetfieldtype.publicationIDType.title=Identifier Type datasetfieldtype.publicationIDNumber.title=Identifier @@ -110,6 +111,7 @@ datasetfieldtype.topicClassValue.description=A topic or subject term datasetfieldtype.topicClassVocab.description=The controlled vocabulary used for the keyword term (e.g. LCSH, MeSH) datasetfieldtype.topicClassVocabURI.description=The URL where one can access information about the term's controlled vocabulary datasetfieldtype.publication.description=The article or report that uses the data in the Dataset. The full list of related publications will be displayed on the metadata tab +datasetfieldtype.publicationRelationType.description=The nature of the relationship between this Dataset and the related publication datasetfieldtype.publicationCitation.description=The full bibliographic citation for the related publication datasetfieldtype.publicationIDType.description=The type of identifier that uniquely identifies a related publication datasetfieldtype.publicationIDNumber.description=The identifier for a related publication diff --git a/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java index 2da15147255..8350c5d9875 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/export/OpenAireExportUtilTest.java @@ -617,7 +617,7 @@ public void testWriteRelatedIdentifierElement() throws XMLStreamException, IOExc //then assertEquals("" - + "" + + "" + "RelatedPublicationIDNumber1" + "" + "RelatedPublicationIDNumber2" From 93faadeb7aaa8093369d327cc0394c49ee090850 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 14 Jun 2024 13:24:58 -0400 Subject: [PATCH 051/105] missing element for openaireutil test --- .../harvard/iq/dataverse/export/dataset-all-defaults.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt b/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt index 431f069cb03..6b3c7689bbf 100644 --- a/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt +++ b/src/test/java/edu/harvard/iq/dataverse/export/dataset-all-defaults.txt @@ -324,6 +324,12 @@ "typeClass": "compound", "value": [ { + "publicationRelationType": { + "typeName": "publicationRelationType", + "multiple": false, + "typeClass": "controlledVocabulary", + "value": "IsSupplementTo" + }, "publicationCitation": { "typeName": "publicationCitation", "multiple": false, From c9084e3058045fb54fd960a203588f371ce59b2c Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 14 Jun 2024 13:57:02 -0400 Subject: [PATCH 052/105] contributor type null fix --- .../iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 9ba1e4e3116..92bcf8b481f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -546,7 +546,10 @@ private void writeContributors(XMLStreamWriter xmlw, DvObject dvObject) throws X contributor = subField.getValue(); break; case DatasetFieldConstant.contributorType: - contributorType = subField.getValue().replace(" ", ""); + contributorType = subField.getValue(); + if(contributorType!=null) { + contributorType = contributorType.replace(" ", ""); + } break; } } From cdd6d6fb4357fe2e63dbf597faf57d59c8625670 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 14 Jun 2024 16:05:40 -0400 Subject: [PATCH 053/105] add relationType to base code and DataCite XML --- .../iq/dataverse/DatasetRelPublication.java | 29 ++++++++++----- .../harvard/iq/dataverse/DatasetVersion.java | 37 +++++++++++++------ .../pidproviders/doi/XmlMetadataTemplate.java | 12 ++++-- 3 files changed, 53 insertions(+), 25 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetRelPublication.java b/src/main/java/edu/harvard/iq/dataverse/DatasetRelPublication.java index 7680ebc16db..a0696ab38d9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetRelPublication.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetRelPublication.java @@ -6,7 +6,6 @@ package edu.harvard.iq.dataverse; - /** * * @author skraffmiller @@ -25,10 +24,12 @@ public class DatasetRelPublication { private String description; private boolean replicationData; private int displayOrder; + private String relationType; public int getDisplayOrder() { return displayOrder; } + public void setDisplayOrder(int displayOrder) { this.displayOrder = displayOrder; } @@ -64,8 +65,7 @@ public String getUrl() { public void setUrl(String url) { this.url = url; } - - + public String getTitle() { return title; } @@ -82,12 +82,21 @@ public void setDescription(String description) { this.description = description; } - public boolean isEmpty() { - return ((text==null || text.trim().equals("")) - && (!replicationData) - && (idType==null || idType.trim().equals("")) - && (idNumber==null || idNumber.trim().equals("")) - && (url==null || url.trim().equals(""))); - } + public void setRelationType(String type) { + relationType = type; + + } + + public String getRelationType() { + return relationType; + } + + public boolean isEmpty() { + return ((text == null || text.trim().equals("")) + && (!replicationData) + && (idType == null || idType.trim().equals("")) + && (idNumber == null || idNumber.trim().equals("")) + && (url == null || url.trim().equals(""))); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 6648419216d..eb6fdd4e923 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1356,24 +1356,37 @@ public List getRelatedPublications() { for (DatasetFieldCompoundValue publication : dsf.getDatasetFieldCompoundValues()) { DatasetRelPublication relatedPublication = new DatasetRelPublication(); for (DatasetField subField : publication.getChildDatasetFields()) { - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationCitation)) { - String citation = subField.getDisplayValue(); - relatedPublication.setText(citation); - } - - - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationURL)) { - // We have to avoid using subField.getDisplayValue() here - because the DisplayFormatType - // for this url metadata field is likely set up so that the display value is automatically - // turned into a clickable HTML HREF block, which we don't want to end in our Schema.org JSON-LD output. - // So we want to use the raw value of the field instead, with - // minimal HTML sanitation, just in case (this would be done on all URLs in getDisplayValue()). + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.publicationCitation: + relatedPublication.setText(subField.getDisplayValue()); + break; + case DatasetFieldConstant.publicationURL: + // We have to avoid using subField.getDisplayValue() here - because the DisplayFormatType + // for this url metadata field is likely set up so that the display value is automatically + // turned into a clickable HTML HREF block, which we don't want to end in our Schema.org + // JSON-LD output. So we want to use the raw value of the field instead, with minimal HTML + // sanitation, just in case (this would be done on all URLs in getDisplayValue()). String url = subField.getValue(); if (StringUtils.isBlank(url) || DatasetField.NA_VALUE.equals(url)) { relatedPublication.setUrl(""); } else { relatedPublication.setUrl(MarkupChecker.sanitizeBasicHTML(url)); } + break; + case DatasetFieldConstant.publicationIDType: + // QDR idType has a trailing : now (Aug 2021) + // Get sanitized value without any display modifications + subField.getDatasetFieldType().setDisplayFormat("#VALUE"); + relatedPublication.setIdType(subField.getDisplayValue()); + break; + case DatasetFieldConstant.publicationIDNumber: + // Get sanitized value without any display modifications + subField.getDatasetFieldType().setDisplayFormat("#VALUE"); + relatedPublication.setIdNumber(subField.getDisplayValue()); + break; + case DatasetFieldConstant.publicationRelationType: + relatedPublication.setRelationType(subField.getDisplayValue()); + break; } } relatedPublications.add(relatedPublication); diff --git a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java index 92bcf8b481f..03d4de99691 100644 --- a/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java +++ b/src/main/java/edu/harvard/iq/dataverse/pidproviders/doi/XmlMetadataTemplate.java @@ -4,6 +4,8 @@ import java.io.IOException; import java.io.OutputStream; import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; import java.net.URL; import java.text.ParseException; import java.util.ArrayList; @@ -872,6 +874,10 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th String pubIdType = relatedPub.getIdType(); String identifier = relatedPub.getIdNumber(); String url = relatedPub.getUrl(); + String relationType = relatedPub.getRelationType(); + if(StringUtils.isBlank(relationType)) { + relationType = "IsSupplementTo"; + } /* * Note - with identifier and url fields, it's not clear that there's a single * way those two fields are used for all identifier types. The code here is @@ -921,13 +927,13 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th // For non-URL types, if a URL is given, split the string to get a schemeUri try { - URL relatedUrl = new URL(relatedIdentifier); + URL relatedUrl = new URI(relatedIdentifier).toURL(); String protocol = relatedUrl.getProtocol(); String authority = relatedUrl.getAuthority(); String site = String.format("%s://%s", protocol, authority); relatedIdentifier = relatedIdentifier.substring(site.length()); attributes.put("schemeURI", site); - } catch (MalformedURLException e) { + } catch (URISyntaxException | MalformedURLException e) { // Just an identifier } } @@ -937,7 +943,7 @@ private void writeRelatedIdentifiers(XMLStreamWriter xmlw, DvObject dvObject) th if (pubIdType != null) { attributes.put("relatedIdentifierType", pubIdType); } - attributes.put("relationType", "IsSupplementTo"); + attributes.put("relationType", relationType); relatedIdentifiersWritten = XmlWriterUtil.writeOpenTagIfNeeded(xmlw, "relatedIdentifiers", relatedIdentifiersWritten); XmlWriterUtil.writeFullElementWithAttributes(xmlw, "relatedIdentifier", attributes, relatedIdentifier); } From 360d3fac049239cfc4f41b6be83b8ecc0b16b475 Mon Sep 17 00:00:00 2001 From: Jim Myers Date: Fri, 14 Jun 2024 16:05:57 -0400 Subject: [PATCH 054/105] add relationType to above fold display --- .../iq/dataverse/DatasetVersionUI.java | 32 +++++++++++++------ src/main/webapp/dataset.xhtml | 6 ++-- 2 files changed, 27 insertions(+), 11 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUI.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUI.java index 55b98c178bb..975de391d8c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUI.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersionUI.java @@ -62,14 +62,14 @@ public void setMetadataBlocksForEdit(TreeMap> public DatasetVersionUI initDatasetVersionUI(DatasetVersion datasetVersion, boolean createBlanks) { /*takes in the values of a dataset version and apportions them into lists for - viewing and editng in the dataset page. + viewing and editing in the dataset page. */ setDatasetVersion(datasetVersion); //this.setDatasetAuthors(new ArrayList()); this.setDatasetRelPublications(new ArrayList<>()); - // loop through vaues to get fields for view mode + // loop through values to get fields for view mode for (DatasetField dsf : datasetVersion.getDatasetFields()) { //Special Handling for various fields displayed above tabs in dataset page view. if (dsf.getDatasetFieldType().getName().equals(DatasetFieldConstant.title)) { @@ -114,17 +114,23 @@ public DatasetVersionUI initDatasetVersionUI(DatasetVersion datasetVersion, boo datasetRelPublication.setTitle(dsf.getDatasetFieldType().getLocaleTitle()); datasetRelPublication.setDescription(dsf.getDatasetFieldType().getLocaleDescription()); for (DatasetField subField : relPubVal.getChildDatasetFields()) { - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationCitation)) { + String value = subField.getValue(); + switch (subField.getDatasetFieldType().getName()) { + case DatasetFieldConstant.publicationCitation: datasetRelPublication.setText(subField.getValue()); - } - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationIDNumber)) { + break; + case DatasetFieldConstant.publicationIDNumber: datasetRelPublication.setIdNumber(subField.getValue()); - } - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationIDType)) { + break; + case DatasetFieldConstant.publicationIDType: datasetRelPublication.setIdType(subField.getValue()); - } - if (subField.getDatasetFieldType().getName().equals(DatasetFieldConstant.publicationURL)) { + break; + case DatasetFieldConstant.publicationURL: datasetRelPublication.setUrl(subField.getValue()); + break; + case DatasetFieldConstant.publicationRelationType: + datasetRelPublication.setRelationType(subField.getValue()); + break; } } this.getDatasetRelPublications().add(datasetRelPublication); @@ -263,6 +269,14 @@ public String getRelPublicationUrl() { } } + public String getRelPublicationRelationType() { + if (!this.datasetRelPublications.isEmpty()) { + return this.getDatasetRelPublications().get(0).getRelationType(); + } else { + return ""; + } + } + public String getUNF() { //todo get UNF to calculate and display here. return ""; diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index 13faf9d7f20..4fd91f24c36 100644 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -631,6 +631,7 @@ or !empty DatasetPage.datasetVersionUI.keywordDisplay or !empty DatasetPage.datasetVersionUI.subject.value or !empty DatasetPage.datasetVersionUI.relPublicationCitation + or !empty DatasetPage.datasetVersionUI.relPublicationUrl or !empty DatasetPage.datasetVersionUI.notes.value) and !empty DatasetPage.datasetSummaryFields}"> @@ -650,8 +651,9 @@ data-toggle="tooltip" data-placement="auto right" data-original-title="#{DatasetPage.datasetVersionUI.datasetRelPublications.get(0).description}">