Merge pull request cdimascio#11 from rbamos/master

cdimascio · web-flow · commit 8905ceeaa05e · 2020-04-19T10:22:13.000-04:00
Added smog with all sentences
diff --git a/readability/readability.py b/readability/readability.py
@@ -36,9 +36,10 @@ def linsear_write(self):
         """Calculate Linsear Write."""
         return LinsearWrite(self._statistics).score()
 
-    def smog(self):
-        """SMOG Index."""
-        return Smog(self._statistics, self._analyzer.sentences).score()
+    def smog(self,all_sentences=False):
+        """SMOG Index.
+        `all_sentences` indicates whether SMOG should use a sample of 30 sentences, as described in the original paper, or if it should use all sentences in the text"""
+        return Smog(self._statistics, self._analyzer.sentences,all_sentences=all_sentences).score()
 
     def spache(self):
         """Spache Index."""
diff --git a/readability/scorers/smog.py b/readability/scorers/smog.py
@@ -14,14 +14,22 @@ def __str__(self):
 
 
 class Smog:
-    def __init__(self, stats, sentences):
+    def __init__(self, stats, sentences, all_sentences=False):
+        """
+        Computes the SMOG readability score (Harry McLaughlin, 1969 https://ogg.osu.edu/media/documents/health_lit/WRRSMOG_Readability_Formula_G._Harry_McLaughlin__1969_.pdf)
+        If all_sentences is false, computes the score as described in McLaughlin, 1969, using exactly 30 sentences
+        If all_sentences is true, adjusts the score to use all sentences in the text
+        """
         if stats.num_sentences < 30:
             raise ReadabilityException(
                 'SMOG requires 30 sentences. {} found'
                 .format(stats.num_sentences))
 
         self._stats = stats
-        self._smog_stats = self._smog_text_stats(sentences)
+        self.all_sentences = all_sentences
+        if not self.all_sentences:
+            self._smog_stats = self._smog_text_stats(sentences)
+            
 
     def score(self):
         score = self._score()
@@ -32,9 +40,14 @@ def score(self):
         )
 
     def _score(self):
-        smog_stats = self._smog_stats
+        if self.all_sentences:
+            smog_stats = self._stats
+            num_sentences = smog_stats.num_sentences
+        else:
+            smog_stats = self._smog_stats
+            num_sentences = 30
+        
         num_complex_words = smog_stats.num_poly_syllable_words
-        num_sentences = 30
         return 1.0430 * math.sqrt(30 * num_complex_words / num_sentences) + 3.1291
 
     def _grade_level(self, score):
diff --git a/test/test_readability.py b/test/test_readability.py
@@ -62,11 +62,21 @@ def test_smog(self):
         text = ' '.join(text for i in range(0, 5))
 
         readability = Readability(text)
-        r = readability.smog()
 
-        print(r)
-        self.assertEqual(12.516099999999998, r.score)
-        self.assertEqual('13', r.grade_level)
+        #Test SMOG with 30 sentences
+        r1 = readability.smog()
+        
+        #Test SMOG with all sentences
+        r2 = readability.smog(all_sentences=True)
+
+
+        print("all_sentences=False: %s ; all_sentences=True: %s" % (r1,r2))
+        self.assertEqual(12.516099999999998, r1.score)
+        self.assertEqual('13', r1.grade_level)
+
+        self.assertEqual(12.785403640627713, r2.score)
+        self.assertEqual('13', r2.grade_level)
+
 
     def test_spache(self):
         r = self.readability.spache()