Skip to content

Commit

Permalink
fix: NA county_code is valid, not Nan
Browse files Browse the repository at this point in the history
  • Loading branch information
AlessandroLorenzi committed May 4, 2023
1 parent 3e3235b commit f33cd61
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 2 deletions.
23 changes: 21 additions & 2 deletions pgeocode.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,26 @@
"ZA",
]

NA_VALUES = [
"",
"#N/A",
"#N/A N/A",
"#NA",
"-1.#IND",
"-1.#QNAN",
"-NaN",
"-nan",
"1.#IND",
"1.#QNAN",
"<NA>",
"N/A",
# NA",
"NULL",
"NaN",
"n/a",
"nan",
"null",
]

@contextlib.contextmanager
def _open_extract_url(url: str, country: str) -> Any:
Expand Down Expand Up @@ -231,7 +251,7 @@ def _get_data(country: str) -> Tuple[str, pd.DataFrame]:

data_path = os.path.join(STORAGE_DIR, country.upper() + ".txt")
if os.path.exists(data_path):
data = pd.read_csv(data_path, dtype={"postal_code": str})
data = pd.read_csv(data_path, dtype={"postal_code": str}, na_values=NA_VALUES)
else:
download_urls = [
val.format(country=country) for val in DOWNLOAD_URL
Expand All @@ -246,7 +266,6 @@ def _get_data(country: str) -> Tuple[str, pd.DataFrame]:
)
os.makedirs(STORAGE_DIR, exist_ok=True)
data.to_csv(data_path, index=None)

return data_path, data

def _index_postal_codes(self) -> pd.DataFrame:
Expand Down
6 changes: 6 additions & 0 deletions test_pgeocode.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,12 @@ def test_query_location_exact():
assert isinstance(res, pd.DataFrame)
assert res["state_name"].unique().tolist() == ["Île-de-France"]

def test_location_naples():
# https://github.com/symerio/pgeocode/issues/73
nomi = Nominatim("it")
res = nomi.query_location("Napoli")
assert res["county_name"].unique().tolist() == ["Napoli"]
assert res["county_code"].unique().tolist() == ["NA"]

def test_query_location_fuzzy():
pytest.importorskip("thefuzz")
Expand Down

0 comments on commit f33cd61

Please sign in to comment.