Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DRILL shifts the quality computation to triplestore #505

Merged
merged 3 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,16 +99,16 @@ weighted avg 1.00 1.00 1.00 4

## Learning OWL Class Expression over DBpedia
```python
from ontolearn.learners import TDL
from ontolearn.learners import TDL, Drill
from ontolearn.triple_store import TripleStore
from ontolearn.learning_problem import PosNegLPStandard
from owlapy.owl_individual import OWLNamedIndividual
from owlapy import owl_expression_to_sparql, owl_expression_to_dl
from ontolearn.utils.static_funcs import save_owl_class_expressions
# (1) Initialize Triplestore
kb = TripleStore(url="http://dice-dbpedia.cs.upb.de:9080/sparql")
kb = TripleStore(url="https://dbpedia.data.dice-research.org/sparql")
# (3) Initialize a learner.
model = TDL(knowledge_base=kb)
model = Drill(knowledge_base=kb) # or TDL(knowledge_base=kb)
# (4) Define a description logic concept learning problem.
lp = PosNegLPStandard(pos={OWLNamedIndividual("http://dbpedia.org/resource/Angela_Merkel")},
neg={OWLNamedIndividual("http://dbpedia.org/resource/Barack_Obama")})
Expand All @@ -117,7 +117,7 @@ h = model.fit(learning_problem=lp).best_hypotheses()
print(h)
print(owl_expression_to_dl(h))
print(owl_expression_to_sparql(expression=h))
save_owl_class_expressions(expressions=h,path="owl_prediction")
save_owl_class_expressions(expressions=h,path="#owl_prediction")
```

Fore more please refer to the [examples](https://github.com/dice-group/Ontolearn/tree/develop/examples) folder.
Expand Down
31 changes: 24 additions & 7 deletions ontolearn/learners/drill.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
# -----------------------------------------------------------------------------

import pandas as pd
import json
from owlapy.class_expression import OWLClassExpression
Expand All @@ -42,11 +41,14 @@
import time
import os
# F1 class will be deprecated to become compute_f1_score function.
from ontolearn.utils.static_funcs import compute_f1_score
from ontolearn.utils.static_funcs import compute_f1_score, compute_f1_score_from_confusion_matrix
import random
from ontolearn.heuristics import CeloeBasedReward
from ontolearn.data_struct import PrepareBatchOfPrediction
from tqdm import tqdm
from owlapy.converter import owl_expression_to_sparql_with_confusion_matrix

from ..triple_store import TripleStore
from ..utils.static_funcs import make_iterable_verbose
from owlapy.utils import get_expression_length

Expand Down Expand Up @@ -162,7 +164,11 @@ def __init__(self, knowledge_base,
max_num_of_concepts_tested=max_num_of_concepts_tested,
max_runtime=max_runtime)
# CD: This setting the valiable will be removed later.
self.quality_func = compute_f1_score

if isinstance(self.kb, TripleStore):
self.quality_func = compute_f1_score_from_confusion_matrix
else:
self.quality_func = compute_f1_score

def initialize_training_class_expression_learning_problem(self,
pos: FrozenSet[OWLNamedIndividual],
Expand Down Expand Up @@ -301,9 +307,9 @@ def fit(self, learning_problem: PosNegLPStandard, max_runtime=None):
if max_runtime:
assert isinstance(max_runtime, float) or isinstance(max_runtime, int)
self.max_runtime = max_runtime

# (1) Reinitialize few attributes to ensure a clean start.
self.clean()
# (1) Initialize the start time
# (2) Initialize the start time
self.start_time = time.time()
# (2) Two mappings from a unique OWL Concept to integer, where a unique concept represents the type info
# C(x) s.t. x \in E^+ and C(y) s.t. y \in E^-.
Expand Down Expand Up @@ -429,9 +435,20 @@ def compute_quality_of_class_expression(self, state: RL_State) -> None:
# (3) Increment the number of tested concepts attribute.

"""
if isinstance(self.kb,TripleStore):
sparql_query=owl_expression_to_sparql_with_confusion_matrix(expression=state.concept,
positive_examples=self.pos,
negative_examples=self.neg)
bindings=self.kb.query_results(sparql_query).json()["results"]["bindings"]
assert len(bindings) == 1
bindings=bindings.pop()
confusion_matrix={k : v["value"]for k,v in bindings.items()}
quality = self.quality_func(confusion_matrix=confusion_matrix)


individuals = frozenset([i for i in self.kb.individuals(state.concept)])
quality = self.quality_func(individuals=individuals, pos=self.pos, neg=self.neg)
else:
individuals = frozenset([i for i in self.kb.individuals(state.concept)])
quality = self.quality_func(individuals=individuals, pos=self.pos, neg=self.neg)
state.quality = quality
self._number_of_tested_concepts += 1

Expand Down
8 changes: 6 additions & 2 deletions ontolearn/triple_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -985,9 +985,10 @@ class TripleStore:
url: str
def __init__(self, reasoner=None, url: str = None):

self.url=url
if reasoner is None:
assert url is not None, f"Reasoner:{reasoner} and url of a triplestore {url} cannot be both None."
self.g = TripleStoreReasonerOntology(url=url)
assert url is not None, f"Reasoner:{reasoner} and url of a triplestore {self.url} cannot be both None."
self.g = TripleStoreReasonerOntology(url=self.url)
else:
self.g = reasoner
self.ontology = self.g
Expand Down Expand Up @@ -1192,3 +1193,6 @@ def least_general_named_concepts(self):

def query(self, sparql: str):
yield from self.g.query(sparql_query=sparql)

def query_results(self, sparql: str):
return self.g.query(sparql_query=sparql)
20 changes: 20 additions & 0 deletions ontolearn/utils/static_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,26 @@ def compute_f1_score(individuals, pos, neg) -> float:
return f_1


def compute_f1_score_from_confusion_matrix(confusion_matrix:dict)->float:
tp=int(confusion_matrix["tp"])
fn=int(confusion_matrix["fn"])
fp=int(confusion_matrix["fp"])
tn=int(confusion_matrix["tn"])
try:
recall = tp / (tp + fn)
except ZeroDivisionError:
return 0.0
try:
precision = tp / (tp + fp)
except ZeroDivisionError:
return 0.0

if precision == 0 or recall == 0:
return 0.0

f_1 = 2 * ((precision * recall) / (precision + recall))
return f_1

def plot_umap_reduced_embeddings(X: pandas.DataFrame, y: List[float], name: str = "umap_visualization.pdf") -> None: # pragma: no cover
# TODO:AB: 'umap' is not part of the dependencies !?
import umap
Expand Down
2 changes: 1 addition & 1 deletion tests/test_example_concept_learning_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def test_learning(self):
0.2,
0.97,
0.1,
0.92,
0.90,
0.4,
0.95,
0.3])):
Expand Down
Loading