Skip to content

Commit

Permalink
Merge branch 'dev' into stable
Browse files Browse the repository at this point in the history
  • Loading branch information
haideriqbal committed Sep 10, 2024
2 parents fa1cc4b + bead62e commit 1b74642
Show file tree
Hide file tree
Showing 2,855 changed files with 39,345 additions and 27,787 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import com.google.gson.JsonObject;

import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

public class JsonHelper {
Expand Down Expand Up @@ -41,11 +42,48 @@ public static String objectToString(JsonElement value) {

return elements.get(0);

} else if(value.isJsonObject()) {
} else if(value.isJsonObject() && value.getAsJsonObject().get("value") != null) {
return objectToString(value.getAsJsonObject().get("value"));

/* This is a special case for the OLS API. If the value is a nested JsonObject like this
an example from OIO ontology:
{
"http://www.geneontology.org/formats/oboInOwl#hasURI": {
"type": [
"literal"
],
"datatype": "http://www.w3.org/2001/XMLSchema#anyURI",
"value": "http://www.obofoundry.org/wiki/index.php/Definitions"
},
"http://www.w3.org/1999/02/22-rdf-syntax-ns#type": "http://www.geneontology.org/formats/oboInOwl#DbXref",
"http://www.w3.org/2000/01/rdf-schema#label": {
"type": [
"literal"
],
"value": "URL:http://www.obofoundry.org/wiki/index.php/Definitions"
},
"isObsolete": false
}
* For this type of JsonObject we need to iterate through the entries and find value key.
* For sake of simplicity I've returned the first value which is found as we don't have any
* mechanism to judge which value to prefer over the other.
*/

} else if (value.isJsonObject()) {
for (Map.Entry<String, JsonElement> entry : value.getAsJsonObject().entrySet()) {
JsonElement element = entry.getValue();
if (element.isJsonObject()) {
JsonObject obj = element.getAsJsonObject();
if (obj.has("value")) {
return obj.get("value").getAsString();
}
}
}
} else {
return value.getAsString();
}
return value.getAsString();
}

public static List<JsonElement> getValues(JsonObject json, String predicate) {
Expand Down
2 changes: 2 additions & 0 deletions compare_testcase_output_mac.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/usr/bin/env bash
diff -r -q -a -B -w --strip-trailing-cr --exclude=.gitkeep testcases_output/testcases testcases_expected_output/ > testcases_compare_result.log
33 changes: 33 additions & 0 deletions dataload/configs/idocovid19.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"ontologies": [
{
"id": "idocovid19",
"preferredPrefix": "IDO-COVID-19",
"title": "The COVID-19 Infectious Disease Ontology",
"uri": "https://raw.githubusercontent.com/infectious-disease-ontology-extensions/ido-covid-19/master/ontology/ido%20covid-19",
"description": "The COVID-19 Infectious Disease Ontology (IDO-COVID-19) is an extension of the Infectious Disease Ontology (IDO) and the Virus Infectious Disease Ontology (VIDO). IDO-COVID-19 follows OBO Foundry guidelines, employs the Basic Formal Ontology as its starting point, and covers epidemiology, classification, pathogenesis, and treatment of terms used to represent infection by the SARS-CoV-2 virus strain, and the associated COVID-19 disease.",
"homepage": "https://github.com/infectious-disease-ontology-extensions/ido-covid-19",
"mailing_list": "johnbeverley2021@u.northwestern.edu",
"definition_property": [
"http://purl.obolibrary.org/obo/IAO_0000115"
],
"synonym_property": [
"http://www.geneontology.org/formats/oboInOwl#hasExactSynonym"
],
"hierarchical_property": [
"http://purl.obolibrary.org/obo/BFO_0000050"
],
"base_uri": [
"http://purl.obolibrary.org/obo/IDO-COVID-19"
],
"oboSlims": false,
"reasoner": "OWL2",
"ontology_purl": "https://gist.githubusercontent.com/haideriqbal/9b5245af626bd7687831c19c2c8076e8/raw/2c75495f31df0a379062bf12d3fab323eedbb7a9/idocovid19.owl"
},
{
"id": "oio",
"ontology_purl": "https://gist.githubusercontent.com/haideriqbal/4a2b1a9aa81d9fa26cae81e0b0b7730b/raw/527665128b9be9d7d6133f9a796379600151c737/oboInOwl.owl",
"base_uri": ["http://www.geneontology.org/formats/oboInOwl#"]
}
]
}
53 changes: 40 additions & 13 deletions dataload/linker/src/main/java/LinkerPass1.java
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
import com.google.common.collect.LinkedHashMultimap;
import com.google.common.collect.Multimap;
import com.google.common.collect.SetMultimap;
import com.google.common.io.CountingInputStream;
import com.google.gson.Gson;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonParser;
import com.google.gson.*;
import com.google.gson.stream.JsonReader;
import com.google.gson.stream.JsonToken;

Expand Down Expand Up @@ -168,15 +163,31 @@ public static LinkerPass1Result run(String inputJsonFilename) throws IOException
for(var entry : result.iriToDefinitions.entrySet()) {

EntityDefinitionSet definitions = entry.getValue();

// definingOntologyIris -> definingOntologyIds
for(String ontologyIri : definitions.definingOntologyIris) {
for(String ontologyId : result.ontologyIriToOntologyIds.get(ontologyIri)) {
definitions.definingOntologyIds.add(ontologyId);
if (result.ontologyIriToOntologyIds.containsKey(ontologyIri)) {
for(String ontologyId : result.ontologyIriToOntologyIds.get(ontologyIri)) {
definitions.definingOntologyIds.add(ontologyId);
}
}
}

for(EntityDefinition def : definitions.definitions) {
if(def.curie != null && entry.getValue().definingOntologyIds.iterator().hasNext()) {
JsonObject curieObject = def.curie.getAsJsonObject();
if(curieObject.has("value")) {
String curieValue = curieObject.get("value").getAsString();
if(!curieValue.contains(":")) {
var definingOntologyId = entry.getValue().definingOntologyIds.iterator().next();
EntityDefinition definingEntity = entry.getValue().ontologyIdToDefinitions.get(definingOntologyId);
if (definingEntity != null && definingEntity.curie != null) {
curieValue = definingEntity.curie.getAsJsonObject().get("value").getAsString();
curieObject.addProperty("value", curieValue);
result.iriToDefinitions.put(entry.getKey(), definitions);
}
}
}
}
if(definitions.definingOntologyIds.contains(def.ontologyId)) {
def.isDefiningOntology = true;
}
Expand Down Expand Up @@ -235,14 +246,30 @@ public static void parseEntity(JsonReader jsonReader, String entityType, String
curie = jsonParser.parse(jsonReader);
} else if(key.equals("type")) {
types = gson.fromJson(jsonReader, Set.class);
} else if(key.equals("http://www.w3.org/2000/01/rdf-schema#definedBy")) {
} else if(key.equals("http://www.w3.org/2000/01/rdf-schema#isDefinedBy")) {
JsonElement jsonDefinedBy = jsonParser.parse(jsonReader);
if(jsonDefinedBy.isJsonArray()) {
JsonArray arr = jsonDefinedBy.getAsJsonArray();
for(JsonElement el : arr) {
definedBy.add( el.getAsString() );
for(JsonElement isDefinedBy : arr) {
if (isDefinedBy.isJsonObject()) {
JsonObject obj = isDefinedBy.getAsJsonObject();
var value = obj.get("value");
if (value.isJsonObject()) {
definedBy.add(value.getAsJsonObject().get("value").getAsString());
} else
definedBy.add(value.getAsString());
} else
definedBy.add( isDefinedBy.getAsString() );
}
} else {
} else if (jsonDefinedBy.isJsonObject()) {
JsonObject obj = jsonDefinedBy.getAsJsonObject();
var value = obj.get("value");
if (value.isJsonObject()) {
definedBy.add(value.getAsJsonObject().get("value").getAsString());
} else
definedBy.add(value.getAsString());
}
else {
definedBy.add(jsonDefinedBy.getAsString());
}
} else {
Expand Down
88 changes: 87 additions & 1 deletion dataload/linker/src/main/java/LinkerPass2.java
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import com.google.gson.stream.JsonReader;
import com.google.gson.stream.JsonToken;
Expand Down Expand Up @@ -151,6 +153,10 @@ private static void writeEntityArray(JsonReader jsonReader, JsonWriter jsonWrite
if(name.equals("iri")) {
entityIri = jsonReader.nextString();
jsonWriter.value(entityIri);
} else if (name.equalsIgnoreCase("curie")) {
processCurieObject(jsonReader, jsonWriter, pass1Result, entityIri);
} else if (name.equalsIgnoreCase("shortForm")) {
processShortFormObject(jsonReader, jsonWriter, pass1Result, entityIri);
} else {
CopyJsonGatheringStrings.copyJsonGatheringStrings(jsonReader, jsonWriter, stringsInEntity);
}
Expand Down Expand Up @@ -320,7 +326,7 @@ private static void writeLinkedEntitiesFromGatheredStrings(JsonWriter jsonWriter
private static void writeIriMapping(JsonWriter jsonWriter, EntityDefinitionSet definitions, String ontologyId) throws IOException {

if(definitions.definingDefinitions.size() > 0) {

// There are ontologies which canonically define this term

jsonWriter.name("definedBy");
Expand Down Expand Up @@ -436,4 +442,84 @@ private static class CurieMapResult {
public String source;
}

private static void processShortFormObject(JsonReader jsonReader, JsonWriter jsonWriter, LinkerPass1.LinkerPass1Result pass1Result, String entityIri) throws IOException {
jsonReader.beginObject();
JsonObject shortFormObject = new JsonObject();

while (jsonReader.peek() != JsonToken.END_OBJECT) {
String shortFormFieldName = jsonReader.nextName();
if (shortFormFieldName.equals("type")) {
JsonArray typeArray = new JsonArray();
jsonReader.beginArray();
while (jsonReader.peek() != JsonToken.END_ARRAY) {
typeArray.add(jsonReader.nextString());
}
jsonReader.endArray();
shortFormObject.add("type", typeArray);
} else if (shortFormFieldName.equals("value")) {
String shortFormValue = jsonReader.nextString();
// Modify the value attribute
shortFormValue = getProcessedCurieValue(pass1Result, entityIri).replace(":", "_");
shortFormObject.addProperty("value", shortFormValue);
}
}
jsonReader.endObject();

// Write the modified short form object
jsonWriter.beginObject();
jsonWriter.name("type");
jsonWriter.beginArray();
for (JsonElement typeElement : shortFormObject.getAsJsonArray("type")) {
jsonWriter.value(typeElement.getAsString());
}
jsonWriter.endArray();
jsonWriter.name("value").value(shortFormObject.get("value").getAsString());
jsonWriter.endObject();
}

private static void processCurieObject(JsonReader jsonReader, JsonWriter jsonWriter, LinkerPass1.LinkerPass1Result pass1Result, String entityIri) throws IOException {
jsonReader.beginObject();
JsonObject curieObject = new JsonObject();

while (jsonReader.peek() != JsonToken.END_OBJECT) {
String curieFieldName = jsonReader.nextName();
if (curieFieldName.equals("type")) {
JsonArray typeArray = new JsonArray();
jsonReader.beginArray();
while (jsonReader.peek() != JsonToken.END_ARRAY) {
typeArray.add(jsonReader.nextString());
}
jsonReader.endArray();
curieObject.add("type", typeArray);
} else if (curieFieldName.equals("value")) {
String curieValue = jsonReader.nextString();
// Modify the value attribute
curieValue = getProcessedCurieValue(pass1Result, entityIri);
curieObject.addProperty("value", curieValue);
}
}
jsonReader.endObject();

// Write the modified curie object
jsonWriter.beginObject();
jsonWriter.name("type");
jsonWriter.beginArray();
for (JsonElement typeElement : curieObject.getAsJsonArray("type")) {
jsonWriter.value(typeElement.getAsString());
}
jsonWriter.endArray();
jsonWriter.name("value").value(curieObject.get("value").getAsString());
jsonWriter.endObject();
}

private static String getProcessedCurieValue(LinkerPass1.LinkerPass1Result pass1Result, String entityIri) {
var def = pass1Result.iriToDefinitions.get(entityIri);
if (def.definitions.iterator().hasNext()) {
JsonObject defCurieObject = def.definitions.iterator().next().curie.getAsJsonObject();
if (defCurieObject.has("value")) {
return defCurieObject.get("value").getAsString();
}
}
return "";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ public static void annotateShortForms(OntologyGraph graph) {
if(c.uri == null)
continue;

if (preferredPrefix == null || preferredPrefix.isEmpty()) {
preferredPrefix = graph.config.get("id").toString().toUpperCase();
}

String shortForm = extractShortForm(graph, ontologyBaseUris, preferredPrefix, c.uri);
String curie = shortForm.replaceFirst("_", ":");
Expand Down
69 changes: 69 additions & 0 deletions dev-testing/teststack-mac.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/env bash

if [ $# == 0 ]; then
echo "Usage: $0 <rel_json_config_url> <rel_output_dir>"
echo "If <rel_json_config_url> is a file it will read and load this single configuration."
echo "If <rel_json_config_url> as a directory, it will read and load all json configuration in the directory and
subdirectories."
exit 1
fi

config_url=$1
out_dir=$2

# Create or clean output directory
if [ -d "$out_dir" ]; then
echo "$out_dir already exists and will now be cleaned."
rm -Rf $out_dir/*
else
echo "$out_dir does not exist and will now be created."
mkdir "$out_dir"
fi

function process_config {
echo "process_config param1="$1
echo "process_config param2="$2

local config_url=$1
local out_dir=$2


if [ -d "$config_url" ]; then
echo "$config_url is a directory. Processing config files in $config_url"
local basename=$(basename $config_url)
echo "basename for config_url="$basename
local out_dir_basename=$out_dir/$basename
mkdir $out_dir_basename
for filename in $config_url/*.json; do
echo "filename="$filename
process_config $filename $out_dir_basename
done
for dir in $config_url/*/; do
process_config $dir $out_dir_basename
done
elif [ -f "$config_url" ]; then
echo "$config_url is a file. Processing single config file."
local basename=$(basename $config_url .json)

local relative_out_dir=$out_dir/$basename
mkdir $relative_out_dir

local absolute_out_dir=$(realpath -q $relative_out_dir)
echo "absolute_out_dir="$absolute_out_dir

$OLS4_HOME/dataload/create_datafiles.sh $config_url $absolute_out_dir --noDates

$OLS4_HOME/dev-testing/load_test_into_solr.sh $absolute_out_dir
else
echo "$config_url does not exist."
fi
}

$OLS4_HOME/dev-testing/clean-neo4j.sh
$OLS4_HOME/dev-testing/clean-solr.sh
$OLS4_HOME/dev-testing/start-solr.sh

process_config $config_url $out_dir

$OLS4_HOME/dev-testing/load_test_into_neo4j.sh $out_dir
$OLS4_HOME/dev-testing/start-neo4j.sh
2 changes: 1 addition & 1 deletion ebi_ontologies.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"ontologies": [
{
"id": "upheno",
"ontology_purl": "https://github.com/obophenotype/upheno-dev/releases/download/v2024-09-03/upheno.owl"
"ontology_purl": "https://github.com/obophenotype/upheno-dev/releases/latest/download/upheno.owl"
},
{
"id": "hra",
Expand Down
5 changes: 5 additions & 0 deletions testcases/annotation-properties/gitIssue502.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
"id": "gitIssue502",
"preferredPrefix": "gitIssue502",
"ontology_purl": "./testcases/annotation-properties/gitIssue502.owl"
},
{
"id": "oio",
"ontology_purl": "https://gist.githubusercontent.com/haideriqbal/4a2b1a9aa81d9fa26cae81e0b0b7730b/raw/527665128b9be9d7d6133f9a796379600151c737/oboInOwl.owl",
"base_uri": ["http://www.geneontology.org/formats/oboInOwl#"]
}
]
}
Loading

0 comments on commit 1b74642

Please sign in to comment.