Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion src/rapidfuzz/fuzz.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
from __future__ import annotations

from collections.abc import Hashable, Sequence
from typing import Callable, TypeVar, overload
from typing import Callable, Literal, TypeVar, overload

from rapidfuzz.distance import ScoreAlignment

_UnprocessedType1 = TypeVar("_UnprocessedType1")
_UnprocessedType2 = TypeVar("_UnprocessedType2")
_ScorerType = Literal["indel", "levenshtein"]

@overload
def ratio(
Expand All @@ -34,6 +35,7 @@ def partial_ratio(
*,
processor: None = None,
score_cutoff: float | None = 0,
scorer: _ScorerType = "indel",
) -> float: ...
@overload
def partial_ratio(
Expand All @@ -42,6 +44,7 @@ def partial_ratio(
*,
processor: Callable[[_UnprocessedType1 | _UnprocessedType2], Sequence[Hashable]],
score_cutoff: float | None = 0,
scorer: _ScorerType = "indel",
) -> float: ...
@overload
def partial_ratio_alignment(
Expand All @@ -50,6 +53,7 @@ def partial_ratio_alignment(
*,
processor: None = None,
score_cutoff: float | None = 0,
scorer: _ScorerType = "indel",
) -> ScoreAlignment | None: ...
@overload
def partial_ratio_alignment(
Expand All @@ -58,6 +62,7 @@ def partial_ratio_alignment(
*,
processor: Callable[[_UnprocessedType1 | _UnprocessedType2], Sequence[Hashable]],
score_cutoff: float | None = 0,
scorer: _ScorerType = "indel",
) -> ScoreAlignment | None: ...
@overload
def token_sort_ratio(
Expand Down Expand Up @@ -114,6 +119,7 @@ def partial_token_sort_ratio(
*,
processor: None = None,
score_cutoff: float | None = 0,
scorer: _ScorerType = "indel",
) -> float: ...
@overload
def partial_token_sort_ratio(
Expand All @@ -122,6 +128,7 @@ def partial_token_sort_ratio(
*,
processor: Callable[[_UnprocessedType1 | _UnprocessedType2], Sequence[Hashable]],
score_cutoff: float | None = 0,
scorer: _ScorerType = "indel",
) -> float: ...
@overload
def partial_token_set_ratio(
Expand All @@ -130,6 +137,7 @@ def partial_token_set_ratio(
*,
processor: None = None,
score_cutoff: float | None = 0,
scorer: _ScorerType = "indel",
) -> float: ...
@overload
def partial_token_set_ratio(
Expand All @@ -138,6 +146,7 @@ def partial_token_set_ratio(
*,
processor: Callable[[_UnprocessedType1 | _UnprocessedType2], Sequence[Hashable]],
score_cutoff: float | None = 0,
scorer: _ScorerType = "indel",
) -> float: ...
@overload
def partial_token_ratio(
Expand All @@ -146,6 +155,7 @@ def partial_token_ratio(
*,
processor: None = None,
score_cutoff: float | None = 0,
scorer: _ScorerType = "indel",
) -> float: ...
@overload
def partial_token_ratio(
Expand All @@ -154,6 +164,7 @@ def partial_token_ratio(
*,
processor: Callable[[_UnprocessedType1 | _UnprocessedType2], Sequence[Hashable]],
score_cutoff: float | None = 0,
scorer: _ScorerType = "indel",
) -> float: ...
@overload
def WRatio(
Expand All @@ -162,6 +173,7 @@ def WRatio(
*,
processor: None = None,
score_cutoff: float | None = 0,
scorer: _ScorerType = "indel",
) -> float: ...
@overload
def WRatio(
Expand All @@ -170,6 +182,7 @@ def WRatio(
*,
processor: Callable[[_UnprocessedType1 | _UnprocessedType2], Sequence[Hashable]],
score_cutoff: float | None = 0,
scorer: _ScorerType = "indel",
) -> float: ...
@overload
def QRatio(
Expand Down
36 changes: 30 additions & 6 deletions src/rapidfuzz/fuzz_cpp.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -94,26 +94,34 @@ def ratio(s1, s2, *, processor=None, score_cutoff=None):
return ratio_func(s1_proc.string, s2_proc.string, c_score_cutoff)


def partial_ratio(s1, s2, *, processor=None, score_cutoff=None):
def partial_ratio(s1, s2, *, processor=None, score_cutoff=None, scorer="indel"):
cdef double c_score_cutoff = 0.0 if score_cutoff is None else score_cutoff
cdef RF_StringWrapper s1_proc, s2_proc

setupPandas()
if is_none(s1) or is_none(s2):
return 0

# For non-indel scorers, fall back to Python implementation
if scorer is not None and scorer != "indel":
return fuzz_py.partial_ratio(s1, s2, processor=processor, score_cutoff=score_cutoff, scorer=scorer)

preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc)
return partial_ratio_func(s1_proc.string, s2_proc.string, c_score_cutoff)


def partial_ratio_alignment(s1, s2, *, processor=None, score_cutoff=None):
def partial_ratio_alignment(s1, s2, *, processor=None, score_cutoff=None, scorer="indel"):
cdef double c_score_cutoff = 0.0 if score_cutoff is None else score_cutoff
cdef RF_StringWrapper s1_proc, s2_proc

setupPandas()
if is_none(s1) or is_none(s2):
return None

# For non-indel scorers, fall back to Python implementation
if scorer is not None and scorer != "indel":
return fuzz_py.partial_ratio_alignment(s1, s2, processor=processor, score_cutoff=score_cutoff, scorer=scorer)

preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc)
res = partial_ratio_alignment_func(s1_proc.string, s2_proc.string, c_score_cutoff)

Expand Down Expand Up @@ -159,50 +167,66 @@ def token_ratio(s1, s2, *, processor=None, score_cutoff=None):
return token_ratio_func(s1_proc.string, s2_proc.string, c_score_cutoff)


def partial_token_sort_ratio(s1, s2, *, processor=None, score_cutoff=None):
def partial_token_sort_ratio(s1, s2, *, processor=None, score_cutoff=None, scorer="indel"):
cdef double c_score_cutoff = 0.0 if score_cutoff is None else score_cutoff
cdef RF_StringWrapper s1_proc, s2_proc

setupPandas()
if is_none(s1) or is_none(s2):
return 0

# For non-indel scorers, fall back to Python implementation
if scorer is not None and scorer != "indel":
return fuzz_py.partial_token_sort_ratio(s1, s2, processor=processor, score_cutoff=score_cutoff, scorer=scorer)

preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc)
return partial_token_sort_ratio_func(s1_proc.string, s2_proc.string, c_score_cutoff)


def partial_token_set_ratio(s1, s2, *, processor=None, score_cutoff=None):
def partial_token_set_ratio(s1, s2, *, processor=None, score_cutoff=None, scorer="indel"):
cdef double c_score_cutoff = 0.0 if score_cutoff is None else score_cutoff
cdef RF_StringWrapper s1_proc, s2_proc

setupPandas()
if is_none(s1) or is_none(s2):
return 0

# For non-indel scorers, fall back to Python implementation
if scorer is not None and scorer != "indel":
return fuzz_py.partial_token_set_ratio(s1, s2, processor=processor, score_cutoff=score_cutoff, scorer=scorer)

preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc)
return partial_token_set_ratio_func(s1_proc.string, s2_proc.string, c_score_cutoff)


def partial_token_ratio(s1, s2, *, processor=None, score_cutoff=None):
def partial_token_ratio(s1, s2, *, processor=None, score_cutoff=None, scorer="indel"):
cdef double c_score_cutoff = 0.0 if score_cutoff is None else score_cutoff
cdef RF_StringWrapper s1_proc, s2_proc

setupPandas()
if is_none(s1) or is_none(s2):
return 0

# For non-indel scorers, fall back to Python implementation
if scorer is not None and scorer != "indel":
return fuzz_py.partial_token_ratio(s1, s2, processor=processor, score_cutoff=score_cutoff, scorer=scorer)

preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc)
return partial_token_ratio_func(s1_proc.string, s2_proc.string, c_score_cutoff)


def WRatio(s1, s2, *, processor=None, score_cutoff=None):
def WRatio(s1, s2, *, processor=None, score_cutoff=None, scorer="indel"):
cdef double c_score_cutoff = 0.0 if score_cutoff is None else score_cutoff
cdef RF_StringWrapper s1_proc, s2_proc

setupPandas()
if is_none(s1) or is_none(s2):
return 0

# For non-indel scorers, fall back to Python implementation
if scorer is not None and scorer != "indel":
return fuzz_py.WRatio(s1, s2, processor=processor, score_cutoff=score_cutoff, scorer=scorer)

preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc)
return WRatio_func(s1_proc.string, s2_proc.string, c_score_cutoff)

Expand Down
Loading