Skip to content

Commit

Permalink
starting point for DCM APIs #3725
Browse files Browse the repository at this point in the history
  • Loading branch information
pdurbin committed May 16, 2017
1 parent 51f2ba6 commit 98fdfae
Show file tree
Hide file tree
Showing 8 changed files with 479 additions and 1 deletion.
42 changes: 42 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/Dataset.java
Original file line number Diff line number Diff line change
Expand Up @@ -724,4 +724,46 @@ public DatasetThumbnail getDatasetThumbnail() {
return DatasetUtil.getThumbnail(this);
}

/**
* The Data Capture Module provides an rsync script for uploading data.
* Dataverse presents the script to the user for download instead of the
* usual "Upload Files" option. Yes, we are effectively telling the user,
* "To upload, you must first download." :)
*
* FIXME: Is this the right place to store the rsync script? Should we only
* store a URL instead? Make it non-Transient.
*/
// @Column(columnDefinition = "TEXT", nullable = true)
@Transient
private String rsyncScript;

public String getRsyncScript() {
return rsyncScript;
}

public void setRsyncScript(String rsyncScript) {
this.rsyncScript = rsyncScript;
}

public enum FileUploadMechanism {
/**
* @todo Eventually, rather than hard-coding "RSYNC" et al here, each
* should be a row in a table.
*/
/**
* Files are uploaded through the GUI or SWORD.
*
* @todo Instead of "STANDARD" should we split out "GUI" and "SWORD" as
* separate mechanisms? What if we add a non-SWORD API endpoint for
* uploads ( https://github.com/IQSS/dataverse/issues/1612 )some day?
*/
STANDARD,
/**
* Files are uploaded via rsync only and upload via any other mechanism
* is not allowed. This option requires setup of the Data Capture
* Module.
*/
RSYNC
};

}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import edu.harvard.iq.dataverse.engine.DataverseEngine;
import edu.harvard.iq.dataverse.authorization.Permission;
import edu.harvard.iq.dataverse.authorization.groups.impl.explicit.ExplicitGroupServiceBean;
import edu.harvard.iq.dataverse.datacapturemodule.DataCaptureModuleServiceBean;
import edu.harvard.iq.dataverse.engine.command.Command;
import edu.harvard.iq.dataverse.engine.command.CommandContext;
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
Expand Down Expand Up @@ -146,6 +147,9 @@ public class EjbDataverseEngine {
@EJB
MapLayerMetadataServiceBean mapLayerMetadata;

@EJB
DataCaptureModuleServiceBean dataCaptureModule;

@PersistenceContext(unitName = "VDCNet-ejbPU")
private EntityManager em;

Expand Down Expand Up @@ -420,6 +424,11 @@ public MapLayerMetadataServiceBean mapLayerMetadata() {
return mapLayerMetadata;
}

@Override
public DataCaptureModuleServiceBean dataCaptureModule() {
return dataCaptureModule;
}

};
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package edu.harvard.iq.dataverse.datacapturemodule;

import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
import java.util.logging.Logger;
import com.mashape.unirest.http.HttpResponse;
import com.mashape.unirest.http.JsonNode;
import com.mashape.unirest.http.Unirest;
import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import static edu.harvard.iq.dataverse.settings.SettingsServiceBean.Key.DataCaptureModuleUrl;
import java.io.Serializable;
import javax.ejb.EJB;
import javax.ejb.Stateless;
import javax.inject.Named;
import javax.json.JsonObjectBuilder;
import javax.persistence.EntityManager;
import javax.persistence.PersistenceContext;

/**
* This class contains all the methods that have external runtime dependencies
* such as the Data Capture Module itself and PostgreSQL.
*/
@Stateless
@Named
public class DataCaptureModuleServiceBean implements Serializable {

private static final Logger logger = Logger.getLogger(DataCaptureModuleServiceBean.class.getCanonicalName());

@EJB
SettingsServiceBean settingsService;

@PersistenceContext(unitName = "VDCNet-ejbPU")
private EntityManager em;

/**
* @param user AuthenticatedUser
* @return Unirest response as JSON or null.
* @throws Exception if Data Capture Module URL hasn't been configured or if
* the POST failed for any reason.
*/
public HttpResponse<JsonNode> requestRsyncScriptCreation(AuthenticatedUser user, Dataset dataset, JsonObjectBuilder jab) throws Exception {
String dcmBaseUrl = settingsService.getValueForKey(DataCaptureModuleUrl);
if (dcmBaseUrl == null) {
throw new Exception("Problem POSTing JSON to Data Capture Module. The '" + DataCaptureModuleUrl + "' setting has not been configured.");
}
String jsonString = jab.build().toString();
logger.fine("JSON to send to Data Capture Module: " + jsonString);
HttpResponse<JsonNode> uploadRequest = Unirest.post(dcmBaseUrl + "/ur.py")
.body(jsonString)
.asJson();
return uploadRequest;
}

public HttpResponse<JsonNode> retreiveRequestedRsyncScript(AuthenticatedUser user, Dataset dataset) throws Exception {
String dcmBaseUrl = settingsService.getValueForKey(DataCaptureModuleUrl);
if (dcmBaseUrl == null) {
throw new Exception("Problem GETing JSON to Data Capture Module for dataset " + dataset.getId() + " The '" + DataCaptureModuleUrl + "' setting has not been configured.");
}
HttpResponse<JsonNode> scriptRequest = Unirest
.get(dcmBaseUrl + "/sr.py/" + dataset.getId())
.asJson();
return scriptRequest;
}

public Dataset persistRsyncScript(Dataset dataset, String script) {
dataset.setRsyncScript(script);
return em.merge(dataset);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import edu.harvard.iq.dataverse.UserNotificationServiceBean;
import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean;
import edu.harvard.iq.dataverse.authorization.groups.impl.explicit.ExplicitGroupServiceBean;
import edu.harvard.iq.dataverse.datacapturemodule.DataCaptureModuleServiceBean;
import edu.harvard.iq.dataverse.engine.DataverseEngine;
import edu.harvard.iq.dataverse.ingest.IngestServiceBean;
import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean;
Expand Down Expand Up @@ -117,4 +118,6 @@ public interface CommandContext {
public DatasetVersionServiceBean datasetVersion();

public MapLayerMetadataServiceBean mapLayerMetadata();

public DataCaptureModuleServiceBean dataCaptureModule();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
package edu.harvard.iq.dataverse.engine.command.impl;

import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.authorization.Permission;
import edu.harvard.iq.dataverse.engine.command.CommandContext;
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
import edu.harvard.iq.dataverse.engine.command.RequiredPermissions;
import com.mashape.unirest.http.HttpResponse;
import com.mashape.unirest.http.JsonNode;
import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
import edu.harvard.iq.dataverse.authorization.users.User;
import edu.harvard.iq.dataverse.engine.command.AbstractCommand;
import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
import edu.harvard.iq.dataverse.engine.command.exception.PermissionException;
import edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder;
import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder;
import java.util.Collections;
import java.util.logging.Logger;
import javax.json.Json;
import javax.json.JsonObjectBuilder;

/**
* Always catch a RuntimeException when calling this command, which may occur on
* any problem contacting the Data Capture Module! We have to throw a
* RuntimeException because otherwise ctxt.engine().submit() will put "OK" for
* "actiontype" in the actionlogrecord rather than "InternalError" if you throw
* a CommandExecutionException.
*
* @todo Who is responsible for knowing when it's appropriate to create an rsync
* script for a dataset, Dataverse or the Data Capture Module? For now the DCM
* will always create an rsync script, which may not be what we want.
*/
@RequiredPermissions(Permission.AddDataset)
public class RequestRsyncScriptCommand extends AbstractCommand<JsonObjectBuilder> {

private static final Logger logger = Logger.getLogger(RequestRsyncScriptCommand.class.getCanonicalName());

private final Dataset dataset;
private final DataverseRequest request;

public RequestRsyncScriptCommand(DataverseRequest requestArg, Dataset datasetArg) {
super(requestArg, datasetArg);
request = requestArg;
dataset = datasetArg;
}

@Override
public JsonObjectBuilder execute(CommandContext ctxt) throws CommandException {
// {"dep_email": "bob.smith@example.com", "uid": 42, "depositor_name": ["Smith", "Bob"], "lab_email": "john.doe@example.com", "datacite.resourcetype": "X-Ray Diffraction"}
User user = request.getUser();
if (!(user instanceof AuthenticatedUser)) {
/**
* @todo get Permission.AddDataset from above somehow rather than
* duplicating it here.
*/
throw new PermissionException("This command can only be called by an AuthenticatedUser, not " + user,
this, Collections.singleton(Permission.AddDataset), dataset);
}
AuthenticatedUser au = (AuthenticatedUser) user;
HttpResponse<JsonNode> response;
/**
* @todo Refactor this building of JSON to make it testable.
*/
JsonObjectBuilder jab = Json.createObjectBuilder();
// The general rule should be to always pass the user id and dataset id to the DCM.
jab.add("userId", au.getId());
jab.add("datasetId", dataset.getId());
String errorPreamble = "User id " + au.getId() + " had a problem retrieving rsync script for dataset id " + dataset.getId() + " from Data Capture Module. ";
try {
response = ctxt.dataCaptureModule().requestRsyncScriptCreation(au, dataset, jab);
} catch (Exception ex) {
throw new RuntimeException(errorPreamble + ex.getLocalizedMessage(), ex);
}
int statusCode = response.getStatus();
/**
* @todo Since we're creating something, maybe a 201 response would be
* more appropriate.
*/
if (statusCode != 200) {
/**
* @todo is the body too big to fit in the actionlogrecord? The
* column length on "info" is 1024. See also
* https://github.com/IQSS/dataverse/issues/2669
*/
throw new RuntimeException(errorPreamble + "Rather than 200 the status code was " + statusCode + ". The body was \'" + response.getBody() + "\'.");
}
String message = response.getBody().getObject().getString("status");
logger.info("Message from Data Caputure Module upload request endpoint: " + message);
/**
* @todo Should we persist to the database the fact that we have
* requested a script? That way we could avoid hitting ur.py (upload
* request) over and over since it is preferred that we only hit it
* once.
*/
/**
* @todo Don't expect to get the script from ur.py (upload request). Go
* fetch it from sr.py (script request) after a minute or so. (Cron runs
* every minute.) Wait 90 seconds to be safe.
*/
long millisecondsToSleep = 0;
try {
Thread.sleep(millisecondsToSleep);
} catch (InterruptedException ex) {
throw new RuntimeException(errorPreamble + "Unable to wait " + millisecondsToSleep + " milliseconds: " + ex.getLocalizedMessage());
}
try {
response = ctxt.dataCaptureModule().retreiveRequestedRsyncScript(au, dataset);
} catch (Exception ex) {
throw new RuntimeException(errorPreamble + "Problem retrieving rsync script: " + ex.getLocalizedMessage());
}
statusCode = response.getStatus();
if (statusCode != 200) {
throw new RuntimeException(errorPreamble + "Rather than 200 the status code was " + statusCode + ". The body was \'" + response.getBody() + "\'.");
}
/**
* @todo What happens when no datasetId is in the JSON?
*/
long datasetId = response.getBody().getObject().getLong("datasetId");
String script = response.getBody().getObject().getString("script");
if (script == null || script.isEmpty()) {
throw new RuntimeException(errorPreamble + "The script was null or empty.");
}
logger.fine("script for dataset " + datasetId + ": " + script);
Dataset updatedDataset = ctxt.dataCaptureModule().persistRsyncScript(dataset, script);
NullSafeJsonBuilder nullSafeJsonBuilder = jsonObjectBuilder()
.add("datasetId", datasetId)
.add("script", script);
return nullSafeJsonBuilder;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ public class SettingsServiceBean {
* So there.
*/
public enum Key {
/**
* For example, https://datacapture.example.org
*/
DataCaptureModuleUrl,
IdentifierGenerationStyle,
OAuth2CallbackUrl,
DefaultAuthProvider,
Expand Down
Loading

0 comments on commit 98fdfae

Please sign in to comment.