Skip to content

Commit

Permalink
Merge pull request #226 from Living-with-machines/225-change-emb-db-name
Browse files Browse the repository at this point in the history
Change embeddings DB name
  • Loading branch information
fedenanni authored May 3, 2023
2 parents cd995c8 + 3786528 commit 4a187e9
Show file tree
Hide file tree
Showing 8 changed files with 34 additions and 14 deletions.
2 changes: 1 addition & 1 deletion examples/run_pipeline_deezy_reldisamb+wmtops.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
"metadata": {},
"outputs": [],
"source": [
"with sqlite3.connect(\"../resources/rel_db/embedding_database.db\") as conn:\n",
"with sqlite3.connect(\"../resources/rel_db/embeddings_database.db\") as conn:\n",
" cursor = conn.cursor()\n",
" mylinker = linking.Linker(\n",
" method=\"reldisamb\",\n",
Expand Down
2 changes: 1 addition & 1 deletion examples/run_pipeline_deezy_reldisamb+wpubl+wmtops.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
"metadata": {},
"outputs": [],
"source": [
"with sqlite3.connect(\"../resources/rel_db/embedding_database.db\") as conn:\n",
"with sqlite3.connect(\"../resources/rel_db/embeddings_database.db\") as conn:\n",
" cursor = conn.cursor()\n",
" mylinker = linking.Linker(\n",
" method=\"reldisamb\",\n",
Expand Down
2 changes: 1 addition & 1 deletion examples/run_pipeline_deezy_reldisamb+wpubl.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
"metadata": {},
"outputs": [],
"source": [
"with sqlite3.connect(\"../resources/rel_db/embedding_database.db\") as conn:\n",
"with sqlite3.connect(\"../resources/rel_db/embeddings_database.db\") as conn:\n",
" cursor = conn.cursor()\n",
" mylinker = linking.Linker(\n",
" method=\"reldisamb\",\n",
Expand Down
2 changes: 1 addition & 1 deletion examples/run_pipeline_deezy_reldisamb.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
"metadata": {},
"outputs": [],
"source": [
"with sqlite3.connect(\"../resources/rel_db/embedding_database.db\") as conn:\n",
"with sqlite3.connect(\"../resources/rel_db/embeddings_database.db\") as conn:\n",
" cursor = conn.cursor()\n",
" mylinker = linking.Linker(\n",
" method=\"reldisamb\",\n",
Expand Down
2 changes: 1 addition & 1 deletion experiments/toponym_resolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@

# --------------------------------------
# Instantiate the linker:
with sqlite3.connect("../resources/rel_db/embedding_database.db") as conn:
with sqlite3.connect("../resources/rel_db/embeddings_database.db") as conn:
cursor = conn.cursor()
mylinker = linking.Linker(
method=top_res_method,
Expand Down
22 changes: 15 additions & 7 deletions tests/test_disambiguation.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_embeddings():
"""
# Test 1: Check glove embeddings
mentions = ["in", "apple"]
with sqlite3.connect("resources/rel_db/embedding_database.db") as conn:
with sqlite3.connect("resources/rel_db/embeddings_database.db") as conn:
cursor = conn.cursor()
embs = rel_utils.get_db_emb(cursor, mentions, "snd")
assert len(mentions) == len(embs)
Expand All @@ -47,7 +47,9 @@ def test_embeddings():


def test_prepare_initial_data():
df = pd.read_csv("experiments/outputs/data/lwm/linking_df_split.tsv", sep="\t").iloc[:1]
df = pd.read_csv(
"experiments/outputs/data/lwm/linking_df_split.tsv", sep="\t"
).iloc[:1]
parsed_doc = rel_utils.prepare_initial_data(df, context_len=100)
assert parsed_doc["4939308_1"][0]["mention"] == "STALYBRIDGE"
assert parsed_doc["4939308_1"][0]["gold"][0] == "Q1398653"
Expand Down Expand Up @@ -106,7 +108,7 @@ def test_train():
"do_test": False,
},
)
with sqlite3.connect("resources/rel_db/embedding_database.db") as conn:
with sqlite3.connect("resources/rel_db/embeddings_database.db") as conn:
cursor = conn.cursor()

mylinker = linking.Linker(
Expand Down Expand Up @@ -148,7 +150,10 @@ def test_train():
# candidates to the training set):
mylinker.rel_params["ed_model"] = mylinker.train_load_model(myranker)

assert type(mylinker.rel_params["ed_model"]) == entity_disambiguation.EntityDisambiguation
assert (
type(mylinker.rel_params["ed_model"])
== entity_disambiguation.EntityDisambiguation
)

# assert expected performance on test set
assert mylinker.rel_params["ed_model"].best_performance["f1"] == 0.6583541147132169
Expand Down Expand Up @@ -206,7 +211,7 @@ def test_load_eval_model():
},
)

with sqlite3.connect("resources/rel_db/embedding_database.db") as conn:
with sqlite3.connect("resources/rel_db/embeddings_database.db") as conn:
cursor = conn.cursor()

mylinker = linking.Linker(
Expand Down Expand Up @@ -249,7 +254,10 @@ def test_load_eval_model():
# candidates to the training set):
mylinker.rel_params["ed_model"] = mylinker.train_load_model(myranker)

assert type(mylinker.rel_params["ed_model"]) == entity_disambiguation.EntityDisambiguation
assert (
type(mylinker.rel_params["ed_model"])
== entity_disambiguation.EntityDisambiguation
)


def test_predict():
Expand Down Expand Up @@ -303,7 +311,7 @@ def test_predict():
"do_test": False,
},
)
with sqlite3.connect("resources/rel_db/embedding_database.db") as conn:
with sqlite3.connect("resources/rel_db/embeddings_database.db") as conn:
cursor = conn.cursor()

mylinker = linking.Linker(
Expand Down
4 changes: 2 additions & 2 deletions tests/test_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def test_deezy_rel_wpubl_wmtops():
},
)

with sqlite3.connect("resources/rel_db/embedding_database.db") as conn:
with sqlite3.connect("resources/rel_db/embeddings_database.db") as conn:
cursor = conn.cursor()
mylinker = linking.Linker(
method="reldisamb",
Expand Down Expand Up @@ -238,7 +238,7 @@ def test_perfect_rel_wpubl_wmtops():
},
)

with sqlite3.connect("resources/rel_db/embedding_database.db") as conn:
with sqlite3.connect("resources/rel_db/embeddings_database.db") as conn:
cursor = conn.cursor()
mylinker = linking.Linker(
method="reldisamb",
Expand Down
12 changes: 12 additions & 0 deletions utils/REL/entity_disambiguation.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,18 @@ def __init__(self, db_embs, user_config, reset_embeddings=False):
assert (
test is not None
), "DB embeddings in wrong folder..? Test embedding not found.."
test = rel_utils.get_db_emb(self.db_embs, ["#ENTITY/UNK#"], "entity")[0]
assert (
test is not None
), "DB embeddings in wrong folder..? Test embedding not found.."
test = rel_utils.get_db_emb(self.db_embs, ["#WORD/UNK#"], "word")[0]
assert (
test is not None
), "DB embeddings in wrong folder..? Test embedding not found.."
test = rel_utils.get_db_emb(self.db_embs, ["#SND/UNK#"], "snd")[0]
assert (
test is not None
), "DB embeddings in wrong folder..? Test embedding not found.."

# Initialise embedding dictionary:
self.__load_embeddings()
Expand Down

0 comments on commit 4a187e9

Please sign in to comment.