Skip to content

Commit 9ba91d8

Browse files
committed
add simpler license matcher
1 parent 2734288 commit 9ba91d8

File tree

1 file changed

+22
-5
lines changed

1 file changed

+22
-5
lines changed

grayskull/license/discovery.py

+22-5
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,18 @@ def get_all_licenses_from_spdx() -> List:
6262
]
6363

6464

65+
def _match_scrambled_exact(candidate, licenses) -> str | None:
66+
"""
67+
Return license with rearranged word order only.
68+
69+
Fancy scorer confuses BSD-3-Clause with DEC-3-Clause.
70+
"""
71+
bag = set(re.findall(r"\w+", candidate.lower()))
72+
for license in licenses:
73+
if bag == set(re.findall(r"\w+", license.lower())):
74+
return license
75+
76+
6577
def match_license(name: str) -> dict:
6678
"""Match if the given license name matches any license present on
6779
spdx.org
@@ -75,11 +87,16 @@ def match_license(name: str) -> dict:
7587
name = re.sub(r"\s+license\s*", "", name.strip(), flags=re.IGNORECASE)
7688
name = name.strip()
7789

78-
best_matches = process.extract(
79-
name, _get_all_license_choice(all_licenses), scorer=partial_ratio
80-
)
81-
best_matches = process.extract(name, [lc for lc, *_ in best_matches])
82-
spdx_license = best_matches[0]
90+
exact_match = _match_scrambled_exact(name, _get_all_license_choice(all_licenses))
91+
if exact_match:
92+
best_matches = [(exact_match, 100, 0)]
93+
spdx_license = best_matches[0]
94+
else:
95+
best_matches = process.extract(
96+
name, _get_all_license_choice(all_licenses), scorer=partial_ratio
97+
)
98+
best_matches = process.extract(name, [lc for lc, *_ in best_matches])
99+
spdx_license = best_matches[0]
83100

84101
if spdx_license[1] < 100:
85102
# Prefer "-or-later" licenses over the "-only"

0 commit comments

Comments
 (0)