Skip to content

Commit

Permalink
Merge pull request #359 from LiUSemWeb/cli-tools
Browse files Browse the repository at this point in the history
Cli tools
  • Loading branch information
hartig authored Sep 27, 2024
2 parents 2d99962 + f20ca89 commit bbab817
Show file tree
Hide file tree
Showing 24 changed files with 749 additions and 251 deletions.
68 changes: 68 additions & 0 deletions bin/common.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#!/bin/sh
# This script resolves HEFQUIN_HOME, locates the Java binary, and sets
# the classpath to point to the correct JAR file.

# Function to resolve symbolic links and return the absolute path of a file
resolveLink() {
local NAME=$1 # Assign the first argument (the file name) to a local variable
# Loop to resolve symbolic links until the actual file is found
while [ -L "$NAME" ]; do
case "$OSTYPE" in
# For macOS or BSD systems, resolve the path using dirname and basename
darwin*|bsd*) NAME=$(cd "$(dirname "$NAME")" && pwd -P)/$(basename "$NAME") ;;
# For Linux and other systems, use readlink to resolve the full path
*) NAME=$(readlink -f "$NAME") ;;
esac
done
# Output the resolved absolute path
echo "$NAME"
}

# If HEFQUIN_HOME is not already set, resolve it based on the script's location
if [ -z "$HEFQUIN_HOME" ]; then
# Resolve the absolute path of the current script
SCRIPT=$(resolveLink "$0")
# Set HEFQUIN_HOME to the parent directory of the script's directory
HEFQUIN_HOME=$(cd "$(dirname "$SCRIPT")/.." && pwd)
# Export HEFQUIN_HOME so it can be used in child processes
export HEFQUIN_HOME
fi

# If JAVA is not set, locate the Java binary
if [ -z "$JAVA" ]; then
# If JAVA_HOME is set, use it to locate the Java binary
if [ -z "$JAVA_HOME" ]; then
JAVA=$(which java) # If JAVA_HOME is not set, fall back to finding java in the system PATH
else
JAVA="$JAVA_HOME/bin/java" # Use JAVA_HOME to find the Java binary
fi
fi

# If JAVA is still not set, print an error message and exit the script
if [ -z "$JAVA" ]; then
echo "Cannot find a Java JDK."
echo "Please set JAVA or JAVA_HOME and ensure java (>=Java 17) is in your PATH." 1>&2
exit 1 # Exit the script with an error code
fi

# Look for the directory that is expected to contain the hefquin-cli JAR file
if [ -d "${HEFQUIN_HOME}/libs/" ]; then
# If the libs directory exists, use it
HEFQUIN_JAR_DIR=${HEFQUIN_HOME}/libs/
elif [ -d "${HEFQUIN_HOME}/hefquin-cli/target/" ]; then
# Otherwise, if hefquin-cli/target/ exists, use that one
HEFQUIN_JAR_DIR=${HEFQUIN_HOME}/hefquin-cli/target/
else
# Otherwise, print an error message
echo "Cannot find the directory ${HEFQUIN_HOME}/hefquin-cli/target/"
echo "Did you forget to compile the project?"
exit 2 # Exit the script with an error code
fi

# After determining the directory, look for the hefquin-cli JAR file in that directory, and ..
HEFQUIN_CP=$(echo ${HEFQUIN_JAR_DIR}hefquin-cli-*.jar)
# .. check that the JAR file is actually there
if [ ! -f ${HEFQUIN_CP} ]; then
echo "Cannot find the HeFQUIN JAR file in ${HEFQUIN_JAR_DIR}"
exit 3 # Exit the script with an error code
fi
6 changes: 6 additions & 0 deletions bin/hefquin
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
source ${SCRIPT_DIR}/common.sh

# Run the Java command
"$JAVA" $JVM_ARGS -cp $HEFQUIN_CP se.liu.ida.hefquin.cli.RunQueryWithoutSrcSel $@
6 changes: 6 additions & 0 deletions bin/hefquin-pg
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
source ${SCRIPT_DIR}/common.sh

# Run the Java command
"$JAVA" $JVM_ARGS -cp $HEFQUIN_CP se.liu.ida.hefquin.cli.RunBGPOverNeo4j $@
6 changes: 6 additions & 0 deletions bin/hefquin-pgmat
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
source ${SCRIPT_DIR}/common.sh

# Run the Java command
"$JAVA" $JVM_ARGS -cp $HEFQUIN_CP se.liu.ida.hefquin.cli.MaterializeRDFViewOfLPG $@
6 changes: 6 additions & 0 deletions bin/hefquin-server
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
source ${SCRIPT_DIR}/common.sh

# Run the Java command
"$JAVA" $JVM_ARGS -cp $HEFQUIN_CP se.liu.ida.hefquin.cli.RunHeFQUINServer $@
6 changes: 6 additions & 0 deletions hefquin-cli/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@
<artifactId>hefquin-engine</artifactId>
<version>0.0.4-SNAPSHOT</version>
</dependency>
<!-- hefquin-service -->
<dependency>
<groupId>se.liu.ida.hefquin</groupId>
<artifactId>hefquin-service</artifactId>
<version>0.0.4-SNAPSHOT</version>
</dependency>
</dependencies>

<build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import arq.cmdline.ModTime;

import org.apache.jena.atlas.RuntimeIOException;
import org.apache.jena.cmd.ArgDecl;
import org.apache.jena.datatypes.RDFDatatype;
import org.apache.jena.datatypes.TypeMapper;
import org.apache.jena.graph.Graph;
Expand All @@ -20,6 +19,7 @@
import org.apache.jena.sparql.core.Quad;
import org.apache.jena.sparql.graph.GraphFactory;

import se.liu.ida.hefquin.cli.modules.ModNeo4jEndpoint;
import se.liu.ida.hefquin.cli.modules.ModLPG2RDFConfiguration;
import se.liu.ida.hefquin.engine.wrappers.lpg.conf.LPG2RDFConfiguration;
import se.liu.ida.hefquin.engine.wrappers.lpg.conn.Neo4jConnectionFactory;
Expand Down Expand Up @@ -47,44 +47,77 @@
import java.util.List;
import java.util.zip.GZIPOutputStream;

/**
* A command-line tool to materialize an RDF view of a labeled property graph
* (LPG) retrieved from a Neo4j endpoint using Cypher queries. This tool takes
* various configuration options and outputs RDF data that conforms to a given
* LPG-to-RDF configuration.
*/
public class MaterializeRDFViewOfLPG extends CmdARQ
{
protected final ModNeo4jEndpoint modEndpoint = new ModNeo4jEndpoint();
protected final ModTime modTime = new ModTime();
protected final ModLangOutput modLangOut = new ModLangOutput();
protected final ModLPG2RDFConfiguration modLPG2RDFConfiguration = new ModLPG2RDFConfiguration();

protected final ArgDecl argEndpointURI = new ArgDecl(ArgDecl.HasValue, "endpoint");


/**
* Main entry point of the tool, accepting command-line arguments to specify the
* Neo4j connection details and output format options.
*
* @param args Command-line arguments.
*/
public static void main( final String[] args ) {
new MaterializeRDFViewOfLPG(args).mainRun();
new MaterializeRDFViewOfLPG( args ).mainRun();
}

/**
* Constructor that initializes the command-line tool with necessary argument
* modules for endpoint configuration, output format, and timing options.
*
* @param argv Command-line arguments.
*/
protected MaterializeRDFViewOfLPG( final String[] argv ) {
super(argv);

addModule(modTime);
addModule(modLangOut);

addModule(modLPG2RDFConfiguration);

add(argEndpointURI, "--endpoint", "The URI of the Neo4j endpoint");
addModule(modEndpoint);
}

/**
* Returns the usage summary string of the command, showing the required and
* optional arguments.
*
* @return A string that describes the usage of the command.
*/
@Override
protected String getSummary() {
return getCommandName() + "--endpoint=<Neo4j endpoint URI> --time?";
return "Usage: " + getCommandName() + " " +
"--endpoint=<neo4j-endpoint-url> " +
"--username=<neo4j-username> " +
"--password=<neo4j-password>";
}

/**
* Returns the command name used to invoke the tool.
*
* @return The name of the command.
*/
@Override
protected void exec() {
if ( ! hasArg(argEndpointURI) ) {
System.err.println( "Error: URI of Neo4j endpoint not specified.");
System.err.println( " Specify it using the --" + argEndpointURI.getKeyName() + " argument.");
return;
}
protected String getCommandName() {
return "hefquin-pgmat";
}

final String neo4jEndpointURI = getArg(argEndpointURI).getValue();
/**
* Retrieves nodes and edges from a Neo4j database, converts them to RDF
* triples, and writes the triples to the output stream System.out.
*/
@Override
protected void exec() {
final String endpoint = modEndpoint.getEndpoint();
final String username = modEndpoint.getUsername();
final String password = modEndpoint.getPassword();

final LPG2RDFConfiguration l2rConf = modLPG2RDFConfiguration.getLPG2RDFConfiguration();

Expand All @@ -100,10 +133,16 @@ protected void exec() {
modTime.startTimer();
}

final List<TableRecord> nodesResponse = execQuery(getNodesQuery, neo4jEndpointURI);
final List<TableRecord> nodesResponse = execQuery( getNodesQuery,
endpoint,
username,
password );
writeTriplesForNodes(nodesResponse, l2rConf, rdfOutStream);

final List<TableRecord> edgesResponse = execQuery(getEdgesQuery, neo4jEndpointURI);
final List<TableRecord> edgesResponse = execQuery( getEdgesQuery,
endpoint,
username,
password );
writeTriplesForEdges(edgesResponse, l2rConf, rdfOutStream);

rdfOutStream.finish();
Expand All @@ -114,6 +153,12 @@ protected void exec() {
}
}

/**
* Builds and returns the Cypher query used to retrieve nodes from the Neo4j
* database.
*
* @return A CypherQuery for retrieving nodes.
*/
public CypherQuery buildGetNodesQuery() {
// MATCH (n)
// RETURN n AS node, HEAD(LABELS(n)) AS label
Expand All @@ -130,6 +175,12 @@ public CypherQuery buildGetNodesQuery() {
.build();
}

/**
* Builds and returns the Cypher query used to retrieve edges from the Neo4j
* database.
*
* @return A CypherQuery for retrieving edges.
*/
public CypherQuery buildGetEdgesQuery() {
// MATCH (n1)-[e]->(n2)
// RETURN ID(n1) AS nid1, ID(n2) AS nid2, e AS edge, TYPE(e) AS reltype
Expand All @@ -152,10 +203,21 @@ public CypherQuery buildGetEdgesQuery() {
.build();
}

/**
* Executes the given Cypher query against the Neo4j database using the provided
* connection details.
*
* @param query The Cypher query to be executed.
* @param endpoint The URI of the Neo4j endpoint.
* @param username The username for the Neo4j endpoint.
* @param password The password for the Neo4j endpoint.
* @return A list of table records containing the results of the query.
*/
protected List<TableRecord> execQuery( final CypherQuery query,
final String neo4jEndpointURI ) {
final Neo4jConnection conn = Neo4jConnectionFactory.connect(neo4jEndpointURI);

final String endpoint,
final String username,
final String password ) {
final Neo4jConnection conn = Neo4jConnectionFactory.connect( endpoint, username, password );
final List<TableRecord> result;
try {
result = conn.execute(query);
Expand All @@ -172,6 +234,14 @@ protected List<TableRecord> execQuery( final CypherQuery query,
return result;
}

/**
* Writes RDF triples representing the nodes retrieved from the Neo4j database
* to the provided stream.
*
* @param nodesResponse The list of table records containing the nodes.
* @param lpg2rdfConf The LPG-to-RDF configuration used for conversion.
* @param rdfOutStream The output stream to write the RDF triples to.
*/
protected void writeTriplesForNodes( final List<TableRecord> nodesResponse,
final LPG2RDFConfiguration l2rConf,
final StreamRDF rdfOutStream ) {
Expand Down Expand Up @@ -202,6 +272,14 @@ protected void writeTriplesForNodes( final List<TableRecord> nodesResponse,
}
}

/**
* Writes RDF triples representing the edges retrieved from the Neo4j database
* to the provided stream.
*
* @param edgesResponse The list of table records containing the edges.
* @param lpg2rdfConf The LPG-to-RDF configuration used for conversion.
* @param rdfOutStream The output stream to write the RDF triples to.
*/
protected void writeTriplesForEdges( final List<TableRecord> edgesResponse,
final LPG2RDFConfiguration l2rConf,
final StreamRDF rdfOutStream ) {
Expand Down Expand Up @@ -271,6 +349,15 @@ protected void writeTriplesForProperties( final Node subject,
}
}

/**
* Sets up the output stream for writing RDF data. If the output should be
* compressed, a GZIP stream is created. The RDF format for output is determined
* by the configuration.
*
* @param outStreamBase The base output stream (e.g., System.out).
* @return The StreamRDF configured for the appropriate RDF format and
* compression.
*/
protected StreamRDF setupOutputStream( final OutputStream outStreamBase ) {
final OutputStream outStream;
if ( modLangOut.compressedOutput() ) {
Expand Down
Loading

0 comments on commit bbab817

Please sign in to comment.