Skip to content

Commit

Permalink
Merge pull request DerwenAI#219 from tomaarsen/hotfix/topicrank_no_ca…
Browse files Browse the repository at this point in the history
…ndidates

Prevent exception on TopicRank when there are no noun_chunks
  • Loading branch information
ceteri authored Jul 25, 2022
2 parents 9ab6450 + 2ffc004 commit 7b12b34
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 9 deletions.
3 changes: 3 additions & 0 deletions pytextrank/topicrank.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,9 @@ def _cluster (
returns:
list of clusters of candidates.
"""
if not candidates:
return []

bag_of_words = list(
{
word.text
Expand Down
12 changes: 6 additions & 6 deletions tests/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,13 @@ def test_summary (long_doc: Doc):
LIMIT_PHRASES = 10
TOP_K = 5

# expected for spacy==3.2.1 and en-core-web-sm==3.2.0
# expected for spacy==3.3.1 and en-core-web-sm==3.3.0
expected_trace = [
[0, {0, 1, 6, 8, 9}],
[1, {9}],
[2, {1}],
[3, {7}],
[6, {9, 4}],
[0, {8, 1, 3, 7}],
[2, {3}],
[4, {9}],
[6, {5}],
[10, {0, 2, 3}]
]

tr = long_doc._.textrank
Expand Down
22 changes: 19 additions & 3 deletions tests/test_topicrank.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,10 @@ def test_summary (nlp: Language):
TOP_K = 5

expected_trace = [
[0, [2, 7]],
[0, [2, 6]],
[1, [0, 1]],
[2, [2]],
[3, [8, 7]],
[2, [0, 2]],
[3, [8, 6]],
[4, [5]]
]

Expand Down Expand Up @@ -226,3 +226,19 @@ def scrubber_func(text_span: Span) -> str:
# we expect the "test" to be filtered away (due to stopwords),
# and "scrubber" to be replaced with "modified scrubber"
assert set(phrases) == {"modified scrubber"}

def test_empty_candidates ():
"""
Works as a pipeline component with an empty document.
"""
# given
nlp = spacy.load("en_core_web_sm")
nlp.add_pipe("topicrank", last=True)

# works as a pipeline component
# when
text = ""
doc = nlp(text)

# then
assert len(doc._.phrases) == 0

0 comments on commit 7b12b34

Please sign in to comment.