fix/update_solver_kwargs (#46)

* fix/update_solver_kwargs context has been dropped in favor of explicit lang * fix/update_solver_kwargs context has been dropped in favor of explicit lang * docstrs
OpenVoiceOS · Aug 4, 2024 · c3d9e22 · c3d9e22
1 parent 91014b4
commit c3d9e22
Show file tree

Hide file tree

Showing 2 changed files with 102 additions and 55 deletions.
diff --git a/ovos_classifiers/opm/heuristics.py b/ovos_classifiers/opm/heuristics.py
@@ -87,55 +87,64 @@ def transform(self, utterances: List[str],
 
 
 class HeuristicSummarizerPlugin(TldrSolver):
-    """heuristic summarizer, picks best sentences based on word frequencies"""
+    """Heuristic summarizer that picks the best sentences based on word frequencies."""
 
-    def get_tldr(self, document, context=None):
-        context = context or {}
-        lang = context.get("lang") or "en"
+    def get_tldr(self, document: str, lang: Optional[str] = None) -> str:
+        """
+        Summarizes the given document using word frequencies.
+
+        Args:
+            document (str): The document to summarize.
+            lang (Optional[str]): The language of the document. Defaults to "en".
+
+        Returns:
+            str: The summarized text.
+        """
+        lang = lang or "en"
         return WordFrequencySummarizer().summarize(document, lang)
 
 
 class BM25MultipleChoiceSolver(MultipleChoiceSolver):
-    """select best answer to a question from a list of options """
+    """Selects the best answer to a question from a list of options using the BM25 algorithm."""
 
-    # plugin methods to override
-    def rerank(self, query: str, options: List[str],
-               context: Optional[dict] = None) -> List[Tuple[float, str]]:
+    def rerank(self, query: str, options: List[str], lang: Optional[str] = None) -> List[Tuple[float, str]]:
         """
-        rank options list, returning a list of tuples (score, text)
+        Ranks the options list, returning a list of tuples (score, text).
+
+        Args:
+            query (str): The query string.
+            options (List[str]): The list of options to rank.
+            lang (Optional[str]): The language of the query and options. Defaults to None.
+
+        Returns:
+            List[Tuple[float, str]]: A list of tuples containing the score and the option text.
         """
         from ovos_classifiers.heuristics.machine_comprehension import rank_answers
-        context = context or {}
-        try:
-            lang = context.get("lang")
-            stopwords = get_stopwords(lang)
-        except: # in case nltk is not available or stopwords dataset download fails for any reason
-            stopwords = []
-        return sorted([(s, a) for a, s in rank_answers(query, options, stopwords).items()],
-                      key=lambda k: k[0], reverse=True)
-
-    def select_answer(self, query, options, context=None):
-        """
-        query and options assured to be in self.default_lang
-        return best answer from options list
-        """
-        context = context or {}
-        try:
-            from ovos_classifiers.heuristics.machine_comprehension import get_best_answer
-            lang = context.get("lang")
-            stopwords = get_stopwords(lang)
-        except: # in case nltk is not available or stopwords dataset download fails for any reason
-            stopwords = []
-        return get_best_answer(query, options, stopwords)
+        stopwords = []
+        if lang:
+            try:
+                stopwords = get_stopwords(lang)
+            except Exception:  # In case nltk is not available or stopwords dataset download fails for any reason
+                pass
+
+        ranked_answers = rank_answers(query, options, stopwords)
+        return sorted([(s, a) for a, s in ranked_answers.items()], key=lambda k: k[0], reverse=True)
 
 
 class BM25SolverPlugin(EvidenceSolver):
-    """extract best sentence from text that answers the question, using BM25 algorithm"""
+    """Extracts the best sentence from text that answers the question using the BM25 algorithm."""
 
-    def get_best_passage(self, evidence, question, context=None):
+    def get_best_passage(self, evidence: str, question: str, lang: Optional[str] = None) -> str:
         """
-        evidence and question assured to be in self.default_lang
-         returns summary of provided document
+        Extracts the best passage from the evidence that answers the question.
+
+        Args:
+            evidence (str): The evidence text to search within.
+            question (str): The question to answer.
+            lang (Optional[str]): The language of the evidence and question. Defaults to None.
+
+        Returns:
+            str: The best passage that answers the question.
         """
         bm25 = BM25()
 
@@ -145,9 +154,8 @@ def get_best_passage(self, evidence, question, context=None):
         corpus = [word_tokenize(s) for s in sents]
         bm25.fit(corpus)
         scores = bm25.search(word_tokenize(question))
-        ans = max([s for s in zip(scores, corpus)],
-                  key=lambda k: k[0])[1]
-        return " ".join(ans)
+        best_sentence = max(zip(scores, corpus), key=lambda k: k[0])[1]
+        return " ".join(best_sentence)
 
 
 class HeuristicKeywordExtractorPlugin(KeywordExtractor):

diff --git a/ovos_classifiers/opm/nltk.py b/ovos_classifiers/opm/nltk.py
@@ -1,12 +1,13 @@
 # these plugins require nltk and may download external data/models at runtime
 import random
+from typing import Optional, Tuple, Dict
 
 from nltk import pos_tag as _pt
 from nltk.corpus import wordnet as wn
+from ovos_plugin_manager.templates.keywords import KeywordExtractor
 from ovos_plugin_manager.templates.language import LanguageDetector
 from ovos_plugin_manager.templates.postag import PosTagger
 from ovos_plugin_manager.templates.solvers import QuestionSolver
-from ovos_plugin_manager.templates.keywords import KeywordExtractor
 from quebra_frases import span_indexed_word_tokenize
 
 from ovos_classifiers.datasets.wordnet import Wordnet
@@ -15,16 +16,32 @@
 
 
 class WordnetSolverPlugin(QuestionSolver):
-    """ question answerer that uses wordnet for definitions synonyms and antonyms"""
+    """A question answerer that uses WordNet for definitions, synonyms, and antonyms."""
     enable_tx = True
     priority = 80
 
-    def __init__(self, config=None):
+    def __init__(self, config: Optional[Dict] = None):
+        """
+        Initializes the WordnetSolverPlugin with a given configuration.
+
+        Args:
+            config (Optional[Dict]): Configuration dictionary. Defaults to None.
+        """
         config = config or {}
-        config["lang"] = "en"  # only english supported
+        config["lang"] = "en"  # Only English supported
         super().__init__(config)
 
-    def get_data_key(self, query, lang="en"):
+    def get_data_key(self, query: str, lang: str = "en") -> Tuple[Optional[str], str]:
+        """
+        Determines the type of data (definition, synonyms, antonyms) requested by the query.
+
+        Args:
+            query (str): The query string.
+            lang (str): The language of the query. Defaults to "en".
+
+        Returns:
+            Tuple[Optional[str], str]: A tuple containing the data type key and the processed query.
+        """
         query = HeuristicExtractor.extract_subject(query, lang) or query
 
         # TODO localization
@@ -53,29 +70,51 @@ def get_data_key(self, query, lang="en"):
         return None, query
 
     # officially exported Solver methods
-    def get_data(self, query, context=None):
-        pos = wn.NOUN  # TODO check context for postag
+    def get_data(self, query: str, lang: Optional[str] = None) -> Dict[str, str]:
+        """
+        Retrieves WordNet data for the given query.
+
+        Args:
+            query (str): The query string.
+            lang (Optional[str]): The language of the query. Defaults to None.
+
+        Returns:
+            Dict[str, str]: A dictionary containing WordNet data such as lemmas, antonyms, definitions, etc.
+        """
+        pos = wn.NOUN  # TODO: Check context for part of speech
         synsets = wn.synsets(query, pos=pos)
         if not len(synsets):
             return {}
         synset = synsets[0]
-        res = {"lemmas": Wordnet.get_lemmas(query, pos=pos, synset=synset),
-               "antonyms": Wordnet.get_antonyms(query, pos=pos, synset=synset),
-               "holonyms": Wordnet.get_holonyms(query, pos=pos, synset=synset),
-               "hyponyms": Wordnet.get_hyponyms(query, pos=pos, synset=synset),
-               "hypernyms": Wordnet.get_hypernyms(query, pos=pos, synset=synset),
-               "root_hypernyms": Wordnet.get_root_hypernyms(query, pos=pos, synset=synset),
-               "definition": Wordnet.get_definition(query, pos=pos, synset=synset)}
+        res = {
+            "lemmas": Wordnet.get_lemmas(query, pos=pos, synset=synset),
+            "antonyms": Wordnet.get_antonyms(query, pos=pos, synset=synset),
+            "holonyms": Wordnet.get_holonyms(query, pos=pos, synset=synset),
+            "hyponyms": Wordnet.get_hyponyms(query, pos=pos, synset=synset),
+            "hypernyms": Wordnet.get_hypernyms(query, pos=pos, synset=synset),
+            "root_hypernyms": Wordnet.get_root_hypernyms(query, pos=pos, synset=synset),
+            "definition": Wordnet.get_definition(query, pos=pos, synset=synset)
+        }
         return res
 
-    def get_spoken_answer(self, query, context=None):
-        lang = context.get("lang") or self.default_lang
+    def get_spoken_answer(self, query: str, lang: Optional[str] = None) -> Optional[str]:
+        """
+        Generates a spoken answer for the given query.
+
+        Args:
+            query (str): The query string.
+            lang (Optional[str]): The language of the query. Defaults to None.
+
+        Returns:
+            Optional[str]: The spoken answer, if available.
+        """
+        lang = lang or self.default_lang
         lang = lang.split("-")[0]
         # extract the best keyword with some regexes or fallback to RAKE
         k, query = self.get_data_key(query, lang)
         if not query:
             query = HeuristicExtractor.extract_subject(query, lang) or query
-        data = self.search(query, context)
+        data = self.search(query, lang=lang)
         if k and k in data:
             v = data[k]
             if k in ["lemmas", "antonyms"] and len(v):