linting

Living-with-machines · May 3, 2023 · ea74133 · ea74133
1 parent e0ecbea
commit ea74133
Show file tree

Hide file tree

Showing 31 changed files with 171 additions and 157 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -14,4 +14,4 @@ resources/wikidata
 tests/
 .venv/
 .github/
-__pycache__/
+__pycache__/
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -21,33 +21,32 @@ jobs:
         uses: actions/setup-python@v1
         with:
           python-version: ${{ matrix.python-version }}
-  
-      - name: Bootstrap poetry  
-        shell: bash  
-        run: |  
-          python -m ensurepip  
-          python -m pip install --upgrade pip  
+
+      - name: Bootstrap poetry
+        shell: bash
+        run: |
+          python -m ensurepip
+          python -m pip install --upgrade pip
           python -m pip install poetry
-  
-      - name: Configure poetry  
-        shell: bash  
-        run: |  
+
+      - name: Configure poetry
+        shell: bash
+        run: |
           python -m poetry config virtualenvs.in-project true
-  
-      - name: Set up cache  
-        uses: actions/cache@v2  
-        id: cache  
-        with:  
-          path: .venv  
+
+      - name: Set up cache
+        uses: actions/cache@v2
+        id: cache
+        with:
+          path: .venv
           key: venv-${{ runner.os }}-${{ steps.full-python-version.outputs.version }}-${{ hashFiles('**/poetry.lock') }}
-  
-      - name: Ensure cache is healthy  
-        if: steps.cache.outputs.cache-hit == 'true'  
-        shell: bash  
+
+      - name: Ensure cache is healthy
+        if: steps.cache.outputs.cache-hit == 'true'
+        shell: bash
         run: timeout 10s python -m poetry run pip --version || rm -rf .venv
-
-      - name: Install dependencies  
-        shell: bash  
-        run: |  
-          python -m poetry install
 
+      - name: Install dependencies
+        shell: bash
+        run: |
+          python -m poetry install
diff --git a/README.md b/README.md
@@ -169,7 +169,7 @@ See this with an example in [this notebook](https://github.com/Living-with-machi
 
 ## Installation
 
-If you want to work directly on the code base, we suggest to install T-Res following these instructions (which have been tested Linux (ubuntu 20.04)). 
+If you want to work directly on the code base, we suggest to install T-Res following these instructions (which have been tested Linux (ubuntu 20.04)).
 
 ### First, update the system
 

diff --git a/app/app_template.py b/app/app_template.py
@@ -1,10 +1,11 @@
 import os
 import sys
+import time
 from pathlib import Path
 from typing import Union
-import time
-from fastapi import FastAPI, Request
+
 import uvicorn
+from fastapi import FastAPI, Request
 from pydantic import BaseModel
 
 if "toponym-resolution" in __file__:
@@ -17,6 +18,7 @@
 os.chdir(experiments_path)
 
 from config import CONFIG as pipeline_config
+
 from geoparser import pipeline
 
 geoparser = pipeline.Pipeline(**pipeline_config)
@@ -31,40 +33,46 @@ class APIQuery(BaseModel):
 app_config_name = os.environ["APP_CONFIG_NAME"]
 app = FastAPI(title=f"Toponym Resolution Pipeline API ({app_config_name})")
 
+
 @app.get("/")
 async def read_root(request: Request):
-
-    return {"Title": request.app.title,
-            "request.url": request.url,
-            "request.query_params": request.query_params,
-            "root_path": request.scope.get("root_path"),
-            "request.client": request.client,
-            "hostname": os.uname()[1],
-            "worker_id": os.getpid()
-            }
+    return {
+        "Title": request.app.title,
+        "request.url": request.url,
+        "request.query_params": request.query_params,
+        "root_path": request.scope.get("root_path"),
+        "request.client": request.client,
+        "hostname": os.uname()[1],
+        "worker_id": os.getpid(),
+    }
+
 
 @app.get("/test")
 async def test_pipeline():
+    resolved = geoparser.run_sentence(
+        "Harvey, from London;Thomas and Elizabeth, Barnett.",
+        place="Manchester",
+        place_wqid="Q18125",
+    )
 
-    resolved = geoparser.run_sentence("Harvey, from London;Thomas and Elizabeth, Barnett.", place="Manchester", place_wqid="Q18125")
-
     return resolved
 
+
 @app.get("/toponym_resolution")
 async def run_pipeline(api_query: APIQuery, request_id: Union[str, None] = None):
-
     place = "" if api_query.place is None else api_query.place
     place_wqid = "" if api_query.place_wqid is None else api_query.place_wqid
-    resolved = geoparser.run_sentence(api_query.sentence,
-                                      place=api_query.place, 
-                                      place_wqid=api_query.place_wqid)
-    
+    resolved = geoparser.run_sentence(
+        api_query.sentence, place=api_query.place, place_wqid=api_query.place_wqid
+    )
+
     return resolved
 
+
 @app.get("/health")
 async def healthcheck():
     return {"status": "ok"}
 
 
-if __name__=="__main__":
-    uvicorn.run(app, host="0.0.0.0", port=8000)
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)
diff --git a/app/configs/deezy_mostpopular.py b/app/configs/deezy_mostpopular.py
@@ -1,9 +1,10 @@
 import os
 import sys
 from pathlib import Path
+
 print(sys.path)
 print(os.getcwd())
-from geoparser import ranking, linking
+from geoparser import linking, ranking
 
 myranker = ranking.Ranker(
     method="deezymatch",
@@ -50,4 +51,4 @@
     overwrite_training=False,
 )
 
-CONFIG = {"myranker": myranker, "mylinker": mylinker}
+CONFIG = {"myranker": myranker, "mylinker": mylinker}
diff --git a/app/configs/deezy_relwithoutpubl.py b/app/configs/deezy_relwithoutpubl.py
@@ -1,7 +1,8 @@
 import os
 import sys
 from pathlib import Path
-from geoparser import ranking, linking
+
+from geoparser import linking, ranking
 
 myranker = ranking.Ranker(
     method="deezymatch",
@@ -40,18 +41,18 @@
 )
 
 mylinker = linking.Linker(
-        method='reldisamb',
-        resources_path="../resources/",
-        linking_resources=dict(),
-        base_model="to-be-removed",  # Base model for vector extraction
-        rel_params={
-            "base_path": "../resources/rel_db/",
-            "wiki_version": "wiki_2019/",
-            "training_data": "lwm",  # lwm, aida
-            "ranking": 'relv',  # relv, publ
-            "micro_locs": 'nil',  # "dist", "nil", ""
-        },
-        overwrite_training=False,
+    method="reldisamb",
+    resources_path="../resources/",
+    linking_resources=dict(),
+    base_model="to-be-removed",  # Base model for vector extraction
+    rel_params={
+        "base_path": "../resources/rel_db/",
+        "wiki_version": "wiki_2019/",
+        "training_data": "lwm",  # lwm, aida
+        "ranking": "relv",  # relv, publ
+        "micro_locs": "nil",  # "dist", "nil", ""
+    },
+    overwrite_training=False,
 )
 
-CONFIG = {"myranker": myranker, "mylinker": mylinker}
+CONFIG = {"myranker": myranker, "mylinker": mylinker}
diff --git a/app/configs/deezy_relwithpubl.py b/app/configs/deezy_relwithpubl.py
@@ -1,7 +1,8 @@
 import os
 import sys
 from pathlib import Path
-from geoparser import ranking, linking
+
+from geoparser import linking, ranking
 
 myranker = ranking.Ranker(
     method="deezymatch",
@@ -40,18 +41,18 @@
 )
 
 mylinker = linking.Linker(
-        method='reldisamb',
-        resources_path="../resources/",
-        linking_resources=dict(),
-        base_model="to-be-removed",  # Base model for vector extraction
-        rel_params={
-            "base_path": "../resources/rel_db/",
-            "wiki_version": "wiki_2019/",
-            "training_data": "lwm",  # lwm, aida
-            "ranking": 'publ',  # relv, publ
-            "micro_locs": 'nil',  # "dist", "nil", ""
-        },
-        overwrite_training=False,
+    method="reldisamb",
+    resources_path="../resources/",
+    linking_resources=dict(),
+    base_model="to-be-removed",  # Base model for vector extraction
+    rel_params={
+        "base_path": "../resources/rel_db/",
+        "wiki_version": "wiki_2019/",
+        "training_data": "lwm",  # lwm, aida
+        "ranking": "publ",  # relv, publ
+        "micro_locs": "nil",  # "dist", "nil", ""
+    },
+    overwrite_training=False,
 )
 
-CONFIG = {"myranker": myranker, "mylinker": mylinker}
+CONFIG = {"myranker": myranker, "mylinker": mylinker}
diff --git a/app/configs/perfect_mostpopular.py b/app/configs/perfect_mostpopular.py
@@ -1,7 +1,8 @@
 import os
 import sys
 from pathlib import Path
-from geoparser import ranking, linking
+
+from geoparser import linking, ranking
 
 myranker = ranking.Ranker(
     method="perfectmatch",
@@ -19,4 +20,4 @@
     overwrite_training=False,
 )
 
-CONFIG = {"myranker": myranker, "mylinker": mylinker}
+CONFIG = {"myranker": myranker, "mylinker": mylinker}
diff --git a/app/template.Dockerfile b/app/template.Dockerfile
@@ -14,4 +14,4 @@ COPY app/app_template.py /app/app.py
 COPY app/configs/${APP_NAME}.py /app/config.py
 CMD ["poetry", "run", "uvicorn", "app:app", "--proxy-headers", "--host", "0.0.0.0", "--port", "80", "--workers", "2"]
 
-#TODO: Use variable in Dockerfile
+#TODO: Use variable in Dockerfile
diff --git a/evaluation/README.md b/evaluation/README.md
@@ -1,6 +1,6 @@
 # Evaluation
 
-First, clone the [CLEF-HIPE-2020-scorer](https://github.com/impresso/CLEF-HIPE-2020-scorer) to this folder and checkout [this commit](https://github.com/impresso/CLEF-HIPE-2020-scorer/tree/ac5c876eba58065195024cff550c2b5056986f7b) to have the exact same evaluation setting as in our experiments. 
+First, clone the [CLEF-HIPE-2020-scorer](https://github.com/impresso/CLEF-HIPE-2020-scorer) to this folder and checkout [this commit](https://github.com/impresso/CLEF-HIPE-2020-scorer/tree/ac5c876eba58065195024cff550c2b5056986f7b) to have the exact same evaluation setting as in our experiments.
 
 ```
 git clone https://github.com/impresso/CLEF-HIPE-2020-scorer.git
@@ -18,4 +18,4 @@ python CLEF-HIPE-2020-scorer/clef_evaluation.py --ref ../experiments/outputs/res
 To assess the performance on toponym resolution:
 ```bash
 python CLEF-HIPE-2020-scorer/clef_evaluation.py --ref ../experiments/outputs/results/lwm-true_bundle2_en_1.tsv --pred ../experiments/outputs/results/lwm-pred_bundle2_en_1.tsv --task nel --outdir results/
-```
+```
diff --git a/experiments/README.md b/experiments/README.md
@@ -37,4 +37,4 @@ This script does runs for all different scenarios reported in the experiments in
 To evaluate the different approaches and obtain a table with results such as the one provided in the paper, go to the `../evaluation/` directory. There, you should clone the [HIPE scorer](https://github.com/hipe-eval/HIPE-scorer). We are using the code version at commit 50dff4e, and have added the line `return eval_stats` at the end of the `get_results()` function. From `../evaluation/`, run the following script to obtain the results in latex format:
 ```bash
 python display_results.py
-```
+```
diff --git a/experiments/prepare_data.py b/experiments/prepare_data.py
@@ -1,15 +1,17 @@
-import sys, os
+import os
+import sys
 
 # Add "../" to path to import utils
 sys.path.insert(0, os.path.abspath(os.path.pardir))
-import pandas as pd
-from pathlib import Path
-from utils import get_data
-from utils import preprocess_data
-from sklearn.model_selection import train_test_split
-import random
 import json
 import os
+import random
+from pathlib import Path
+
+import pandas as pd
+from sklearn.model_selection import train_test_split
+
+from utils import get_data, preprocess_data
 
 RANDOM_SEED = 42
 

diff --git a/experiments/toponym_resolution.py b/experiments/toponym_resolution.py
@@ -1,13 +1,14 @@
 import os
+import sqlite3
 import sys
-import pandas as pd
 from pathlib import Path
-import sqlite3
+
+import pandas as pd
 
 # Add "../" to path to import utils
 sys.path.insert(0, os.path.abspath(os.path.pardir))
-from geoparser import recogniser, ranking, linking
 from experiments import experiment
+from geoparser import linking, ranking, recogniser
 
 # Choose test scenario:
 # * "dev" while developing and experimenting,

diff --git a/geoparser/linking.py b/geoparser/linking.py
@@ -1,9 +1,10 @@
 import json
 import os
 import sys
+from pathlib import Path
+
 import numpy as np
 import pandas as pd
-from pathlib import Path
 from haversine import haversine
 from tqdm import tqdm
 
@@ -173,7 +174,9 @@ def by_distance(self, dict_mention, origin_wqid=""):
                     try:
                         geodist = haversine(origin_coords, cand_coords)
                         resulting_cands[candidate] = geodist
-                    except ValueError:  # We have one candidate with coordinates in Venus!
+                    except (
+                        ValueError
+                    ):  # We have one candidate with coordinates in Venus!
                         pass
                     if geodist < keep_lowest_distance:
                         keep_lowest_distance = geodist
@@ -206,7 +209,6 @@ def train_load_model(self, myranker, split="originalsplit"):
             The DeezyMatch candidate vectors.
         """
         if self.method == "reldisamb":
-
             # Generate ED model name:
             linker_name = myranker.method
             if myranker.method == "deezymatch":

diff --git a/geoparser/ranking.py b/geoparser/ranking.py
@@ -1,9 +1,9 @@
+import json
 import os
 import sys
-import json
+from pathlib import Path
 
 import pandas as pd
-from pathlib import Path
 from DeezyMatch import candidate_ranker
 from pandarallel import pandarallel
 from pyxdameraulevenshtein import normalized_damerau_levenshtein_distance
-Original file line number
+Diff line change
@@ Expand Up @@
     To evaluate the different approaches and obtain a table with results such as the one provided in the paper, go to the `../evaluation/` directory. There, you should clone the [HIPE scorer](https://github.com/hipe-eval/HIPE-scorer). We are using the code version at commit 50dff4e, and have added the line `return eval_stats` at the end of the `get_results()` function. From `../evaluation/`, run the following script to obtain the results in latex format:
     ```bash
     python display_results.py
-    ```
+    ```