Skip to content

Commit

Permalink
linting
Browse files Browse the repository at this point in the history
  • Loading branch information
mcollardanuy committed May 3, 2023
1 parent e0ecbea commit ea74133
Show file tree
Hide file tree
Showing 31 changed files with 171 additions and 157 deletions.
2 changes: 1 addition & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ resources/wikidata
tests/
.venv/
.github/
__pycache__/
__pycache__/
49 changes: 24 additions & 25 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,33 +21,32 @@ jobs:
uses: actions/setup-python@v1
with:
python-version: ${{ matrix.python-version }}
- name: Bootstrap poetry
shell: bash
run: |
python -m ensurepip
python -m pip install --upgrade pip

- name: Bootstrap poetry
shell: bash
run: |
python -m ensurepip
python -m pip install --upgrade pip
python -m pip install poetry
- name: Configure poetry
shell: bash
run: |
- name: Configure poetry
shell: bash
run: |
python -m poetry config virtualenvs.in-project true
- name: Set up cache
uses: actions/cache@v2
id: cache
with:
path: .venv
- name: Set up cache
uses: actions/cache@v2
id: cache
with:
path: .venv
key: venv-${{ runner.os }}-${{ steps.full-python-version.outputs.version }}-${{ hashFiles('**/poetry.lock') }}
- name: Ensure cache is healthy
if: steps.cache.outputs.cache-hit == 'true'
shell: bash

- name: Ensure cache is healthy
if: steps.cache.outputs.cache-hit == 'true'
shell: bash
run: timeout 10s python -m poetry run pip --version || rm -rf .venv

- name: Install dependencies
shell: bash
run: |
python -m poetry install

- name: Install dependencies
shell: bash
run: |
python -m poetry install
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ See this with an example in [this notebook](https://github.com/Living-with-machi

## Installation

If you want to work directly on the code base, we suggest to install T-Res following these instructions (which have been tested Linux (ubuntu 20.04)).
If you want to work directly on the code base, we suggest to install T-Res following these instructions (which have been tested Linux (ubuntu 20.04)).

### First, update the system

Expand Down
48 changes: 28 additions & 20 deletions app/app_template.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import os
import sys
import time
from pathlib import Path
from typing import Union
import time
from fastapi import FastAPI, Request

import uvicorn
from fastapi import FastAPI, Request
from pydantic import BaseModel

if "toponym-resolution" in __file__:
Expand All @@ -17,6 +18,7 @@
os.chdir(experiments_path)

from config import CONFIG as pipeline_config

from geoparser import pipeline

geoparser = pipeline.Pipeline(**pipeline_config)
Expand All @@ -31,40 +33,46 @@ class APIQuery(BaseModel):
app_config_name = os.environ["APP_CONFIG_NAME"]
app = FastAPI(title=f"Toponym Resolution Pipeline API ({app_config_name})")


@app.get("/")
async def read_root(request: Request):

return {"Title": request.app.title,
"request.url": request.url,
"request.query_params": request.query_params,
"root_path": request.scope.get("root_path"),
"request.client": request.client,
"hostname": os.uname()[1],
"worker_id": os.getpid()
}
return {
"Title": request.app.title,
"request.url": request.url,
"request.query_params": request.query_params,
"root_path": request.scope.get("root_path"),
"request.client": request.client,
"hostname": os.uname()[1],
"worker_id": os.getpid(),
}


@app.get("/test")
async def test_pipeline():
resolved = geoparser.run_sentence(
"Harvey, from London;Thomas and Elizabeth, Barnett.",
place="Manchester",
place_wqid="Q18125",
)

resolved = geoparser.run_sentence("Harvey, from London;Thomas and Elizabeth, Barnett.", place="Manchester", place_wqid="Q18125")

return resolved


@app.get("/toponym_resolution")
async def run_pipeline(api_query: APIQuery, request_id: Union[str, None] = None):

place = "" if api_query.place is None else api_query.place
place_wqid = "" if api_query.place_wqid is None else api_query.place_wqid
resolved = geoparser.run_sentence(api_query.sentence,
place=api_query.place,
place_wqid=api_query.place_wqid)
resolved = geoparser.run_sentence(
api_query.sentence, place=api_query.place, place_wqid=api_query.place_wqid
)

return resolved


@app.get("/health")
async def healthcheck():
return {"status": "ok"}


if __name__=="__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)
5 changes: 3 additions & 2 deletions app/configs/deezy_mostpopular.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import os
import sys
from pathlib import Path

print(sys.path)
print(os.getcwd())
from geoparser import ranking, linking
from geoparser import linking, ranking

myranker = ranking.Ranker(
method="deezymatch",
Expand Down Expand Up @@ -50,4 +51,4 @@
overwrite_training=False,
)

CONFIG = {"myranker": myranker, "mylinker": mylinker}
CONFIG = {"myranker": myranker, "mylinker": mylinker}
29 changes: 15 additions & 14 deletions app/configs/deezy_relwithoutpubl.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import os
import sys
from pathlib import Path
from geoparser import ranking, linking

from geoparser import linking, ranking

myranker = ranking.Ranker(
method="deezymatch",
Expand Down Expand Up @@ -40,18 +41,18 @@
)

mylinker = linking.Linker(
method='reldisamb',
resources_path="../resources/",
linking_resources=dict(),
base_model="to-be-removed", # Base model for vector extraction
rel_params={
"base_path": "../resources/rel_db/",
"wiki_version": "wiki_2019/",
"training_data": "lwm", # lwm, aida
"ranking": 'relv', # relv, publ
"micro_locs": 'nil', # "dist", "nil", ""
},
overwrite_training=False,
method="reldisamb",
resources_path="../resources/",
linking_resources=dict(),
base_model="to-be-removed", # Base model for vector extraction
rel_params={
"base_path": "../resources/rel_db/",
"wiki_version": "wiki_2019/",
"training_data": "lwm", # lwm, aida
"ranking": "relv", # relv, publ
"micro_locs": "nil", # "dist", "nil", ""
},
overwrite_training=False,
)

CONFIG = {"myranker": myranker, "mylinker": mylinker}
CONFIG = {"myranker": myranker, "mylinker": mylinker}
29 changes: 15 additions & 14 deletions app/configs/deezy_relwithpubl.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import os
import sys
from pathlib import Path
from geoparser import ranking, linking

from geoparser import linking, ranking

myranker = ranking.Ranker(
method="deezymatch",
Expand Down Expand Up @@ -40,18 +41,18 @@
)

mylinker = linking.Linker(
method='reldisamb',
resources_path="../resources/",
linking_resources=dict(),
base_model="to-be-removed", # Base model for vector extraction
rel_params={
"base_path": "../resources/rel_db/",
"wiki_version": "wiki_2019/",
"training_data": "lwm", # lwm, aida
"ranking": 'publ', # relv, publ
"micro_locs": 'nil', # "dist", "nil", ""
},
overwrite_training=False,
method="reldisamb",
resources_path="../resources/",
linking_resources=dict(),
base_model="to-be-removed", # Base model for vector extraction
rel_params={
"base_path": "../resources/rel_db/",
"wiki_version": "wiki_2019/",
"training_data": "lwm", # lwm, aida
"ranking": "publ", # relv, publ
"micro_locs": "nil", # "dist", "nil", ""
},
overwrite_training=False,
)

CONFIG = {"myranker": myranker, "mylinker": mylinker}
CONFIG = {"myranker": myranker, "mylinker": mylinker}
5 changes: 3 additions & 2 deletions app/configs/perfect_mostpopular.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import os
import sys
from pathlib import Path
from geoparser import ranking, linking

from geoparser import linking, ranking

myranker = ranking.Ranker(
method="perfectmatch",
Expand All @@ -19,4 +20,4 @@
overwrite_training=False,
)

CONFIG = {"myranker": myranker, "mylinker": mylinker}
CONFIG = {"myranker": myranker, "mylinker": mylinker}
2 changes: 1 addition & 1 deletion app/template.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ COPY app/app_template.py /app/app.py
COPY app/configs/${APP_NAME}.py /app/config.py
CMD ["poetry", "run", "uvicorn", "app:app", "--proxy-headers", "--host", "0.0.0.0", "--port", "80", "--workers", "2"]

#TODO: Use variable in Dockerfile
#TODO: Use variable in Dockerfile
4 changes: 2 additions & 2 deletions evaluation/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Evaluation

First, clone the [CLEF-HIPE-2020-scorer](https://github.com/impresso/CLEF-HIPE-2020-scorer) to this folder and checkout [this commit](https://github.com/impresso/CLEF-HIPE-2020-scorer/tree/ac5c876eba58065195024cff550c2b5056986f7b) to have the exact same evaluation setting as in our experiments.
First, clone the [CLEF-HIPE-2020-scorer](https://github.com/impresso/CLEF-HIPE-2020-scorer) to this folder and checkout [this commit](https://github.com/impresso/CLEF-HIPE-2020-scorer/tree/ac5c876eba58065195024cff550c2b5056986f7b) to have the exact same evaluation setting as in our experiments.

```
git clone https://github.com/impresso/CLEF-HIPE-2020-scorer.git
Expand All @@ -18,4 +18,4 @@ python CLEF-HIPE-2020-scorer/clef_evaluation.py --ref ../experiments/outputs/res
To assess the performance on toponym resolution:
```bash
python CLEF-HIPE-2020-scorer/clef_evaluation.py --ref ../experiments/outputs/results/lwm-true_bundle2_en_1.tsv --pred ../experiments/outputs/results/lwm-pred_bundle2_en_1.tsv --task nel --outdir results/
```
```
2 changes: 1 addition & 1 deletion experiments/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,4 @@ This script does runs for all different scenarios reported in the experiments in
To evaluate the different approaches and obtain a table with results such as the one provided in the paper, go to the `../evaluation/` directory. There, you should clone the [HIPE scorer](https://github.com/hipe-eval/HIPE-scorer). We are using the code version at commit 50dff4e, and have added the line `return eval_stats` at the end of the `get_results()` function. From `../evaluation/`, run the following script to obtain the results in latex format:
```bash
python display_results.py
```
```
16 changes: 9 additions & 7 deletions experiments/prepare_data.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
import sys, os
import os
import sys

# Add "../" to path to import utils
sys.path.insert(0, os.path.abspath(os.path.pardir))
import pandas as pd
from pathlib import Path
from utils import get_data
from utils import preprocess_data
from sklearn.model_selection import train_test_split
import random
import json
import os
import random
from pathlib import Path

import pandas as pd
from sklearn.model_selection import train_test_split

from utils import get_data, preprocess_data

RANDOM_SEED = 42

Expand Down
7 changes: 4 additions & 3 deletions experiments/toponym_resolution.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
import os
import sqlite3
import sys
import pandas as pd
from pathlib import Path
import sqlite3

import pandas as pd

# Add "../" to path to import utils
sys.path.insert(0, os.path.abspath(os.path.pardir))
from geoparser import recogniser, ranking, linking
from experiments import experiment
from geoparser import linking, ranking, recogniser

# Choose test scenario:
# * "dev" while developing and experimenting,
Expand Down
8 changes: 5 additions & 3 deletions geoparser/linking.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import json
import os
import sys
from pathlib import Path

import numpy as np
import pandas as pd
from pathlib import Path
from haversine import haversine
from tqdm import tqdm

Expand Down Expand Up @@ -173,7 +174,9 @@ def by_distance(self, dict_mention, origin_wqid=""):
try:
geodist = haversine(origin_coords, cand_coords)
resulting_cands[candidate] = geodist
except ValueError: # We have one candidate with coordinates in Venus!
except (
ValueError
): # We have one candidate with coordinates in Venus!
pass
if geodist < keep_lowest_distance:
keep_lowest_distance = geodist
Expand Down Expand Up @@ -206,7 +209,6 @@ def train_load_model(self, myranker, split="originalsplit"):
The DeezyMatch candidate vectors.
"""
if self.method == "reldisamb":

# Generate ED model name:
linker_name = myranker.method
if myranker.method == "deezymatch":
Expand Down
4 changes: 2 additions & 2 deletions geoparser/ranking.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import json
import os
import sys
import json
from pathlib import Path

import pandas as pd
from pathlib import Path
from DeezyMatch import candidate_ranker
from pandarallel import pandarallel
from pyxdameraulevenshtein import normalized_damerau_levenshtein_distance
Expand Down
Loading

0 comments on commit ea74133

Please sign in to comment.