Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions src/multisafepay/util/address_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Copyright (c) MultiSafepay, Inc. All rights reserved.

# This file is licensed under the Open Software License (OSL) version 3.0.
# For a copy of the license, see the LICENSE.txt file in the project root.

# See the DISCLAIMER.md file for disclaimer details.

import re
from typing import List


class AddressParser:
"""
Class AddressParser.

Parses and splits up an address in street and house number
"""

def parse(
self: "AddressParser",
address1: str,
address2: str = "",
) -> List[str]:
"""
Parses and splits up an address in street and house number.

Args:
----
address1 (str): Primary address line
address2 (str): Secondary address line (optional)

Returns:
-------
List[str]: [street, house_number] where street is the street name
and house_number is the house number with any extensions

"""
# Remove whitespaces from the beginning and end
full_address = f"{address1} {address2}".strip()

# Turn multiple whitespaces into one single whitespace
full_address = re.sub(r"\s+", " ", full_address)

# Split the address into 3 groups: street, apartment and extension
pattern = r"(.+?)\s?([\d]+[\S]*)((\s?[A-z])*?)$"
matches = re.match(pattern, full_address)

if not matches:
return [full_address, ""]

return self.extract_street_and_apartment(
matches.group(1) or "",
matches.group(2) or "",
matches.group(3) or "",
)

def extract_street_and_apartment(
self: "AddressParser",
group1: str,
group2: str,
group3: str,
) -> List[str]:
"""
Extract the street and apartment from the matched RegEx results.

When the address starts with a number, it is most likely that group1 and group2 are the house number and
extension. We therefore check if group1 and group2 are numeric, if so, we can assume that group3
will be the street and return group1 and group2 together as the apartment.
If group1 or group2 contains more than just numbers, we can assume group1 is the street and group2 and
group3 are the house number and extension. We therefore return group1 as the street and return group2 and
group3 together as the apartment.

Args:
----
group1 (str): First captured group from regex
group2 (str): Second captured group from regex
group3 (str): Third captured group from regex

Returns:
-------
List[str]: [street, apartment] where street is the street name
and apartment is the house number with extensions

"""
if group1.isdigit() and group2.isdigit():
return [group3.strip(), f"{group1}{group2}".strip()]

return [group1.strip(), f"{group2}{group3}".strip()]
208 changes: 208 additions & 0 deletions tests/multisafepay/unit/util/test_unit_address_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
# Copyright (c) MultiSafepay, Inc. All rights reserved.

# This file is licensed under the Open Software License (OSL) version 3.0.
# For a copy of the license, see the LICENSE.txt file in the project root.

# See the DISCLAIMER.md file for disclaimer details.

import pytest
from multisafepay.util.address_parser import AddressParser


class TestAddressParser:
"""Test class for AddressParser functionality."""

@pytest.mark.parametrize(
("address1", "address2", "expected_street", "expected_apartment"),
[
(
"Kraanspoor",
"39",
"Kraanspoor",
"39",
),
(
"Kraanspoor ",
"39",
"Kraanspoor",
"39",
),
(
"Kraanspoor 39",
"",
"Kraanspoor",
"39",
),
(
"Kraanspoor 39 ",
"",
"Kraanspoor",
"39",
),
(
"Kraanspoor",
"39 ",
"Kraanspoor",
"39",
),
(
"Kraanspoor39",
"",
"Kraanspoor",
"39",
),
(
"Kraanspoor39c",
"",
"Kraanspoor",
"39c",
),
(
"laan 1933 2",
"",
"laan 1933",
"2",
),
(
"laan 1933",
"2",
"laan 1933",
"2",
),
(
"18 septemberplein 12",
"",
"18 septemberplein",
"12",
),
(
"18 septemberplein",
"12",
"18 septemberplein",
"12",
),
(
"kerkstraat 42-f3",
"",
"kerkstraat",
"42-f3",
),
(
"kerkstraat",
"42-f3",
"kerkstraat",
"42-f3",
),
(
"Kerk straat 2b",
"",
"Kerk straat",
"2b",
),
(
"Kerk straat",
"2b",
"Kerk straat",
"2b",
),
(
"1e constantijn huigensstraat 1b",
"",
"1e constantijn huigensstraat",
"1b",
),
(
"1e constantijn huigensstraat",
"1b",
"1e constantijn huigensstraat",
"1b",
),
(
"Heuvel, 2a",
"",
"Heuvel,",
"2a",
),
(
"1e Jan van Kraanspoor",
"2",
"1e Jan van Kraanspoor",
"2",
),
(
"Neherkade 1 XI",
"",
"Neherkade",
"1 XI",
),
(
"Kamp 20 38",
"",
"Kamp 20",
"38",
),
(
"2065 Rue de la Gare",
"",
"Rue de la Gare",
"2065",
),
(
"10 Downing Street",
"",
"Downing Street",
"10",
),
(
"27",
"Alexander Road",
"Alexander Road",
"27",
),
(
"15 Sullivan",
"",
"Sullivan",
"15",
),
(
"110 Kraanspoor",
"",
"Kraanspoor",
"110",
),
(
"Plaza Callao s/n",
"",
"Plaza Callao s/n",
"",
),
],
)
def test_parse_addresses_from_data_provider(
self: "TestAddressParser",
address1: str,
address2: str,
expected_street: str,
expected_apartment: str,
) -> None:
"""
Test the function parse with a provider, to confirm all addresses work.

Args:
----
address1: Primary address line
address2: Secondary address line
expected_street: Expected street name result
expected_apartment: Expected apartment/house number result

"""
parser = AddressParser()
result = parser.parse(address1, address2)

assert (
result[0] == expected_street
), f"Street mismatch: expected '{expected_street}', got '{result[0]}'"
assert (
result[1] == expected_apartment
), f"Apartment mismatch: expected '{expected_apartment}', got '{result[1]}'"