Merge pull request cdimascio#33 from EpsIotaPi/master

cdimascio · web-flow · commit 2f5331093b88 · 2024-01-27T13:23:26.000-05:00
diff --git a/readability/readability.py b/readability/readability.py
@@ -1,49 +1,55 @@
 from .text import Analyzer
 from .scorers import ARI, ColemanLiau, DaleChall, Flesch, \
     FleschKincaid, GunningFog, LinsearWrite, Smog, Spache
-
+import warnings
 
 class Readability:
-    def __init__(self, text):
+    def __init__(self, text, min_words=100):
         self._analyzer = Analyzer()
         self._statistics = self._analyzer.analyze(text)
+        self._min_words = min_words
+        if self._min_words < 100:
+            warnings.warn(
+                "Documents with fewer than 100 words may affect the accuracy of readability tests"
+            )
 
     def ari(self):
         """Calculate Automated Readability Index (ARI)."""
-        return ARI(self._statistics).score()
+        return ARI(self._statistics, self._min_words).score()
 
     def coleman_liau(self):
         """Calculate Coleman Liau Index."""
-        return ColemanLiau(self._statistics).score()
+        return ColemanLiau(self._statistics, self._min_words).score()
 
     def dale_chall(self):
         """Calculate Dale Chall."""
-        return DaleChall(self._statistics).score()
+        return DaleChall(self._statistics, self._min_words).score()
 
     def flesch(self):
         """Calculate Flesch Reading Ease score."""
-        return Flesch(self._statistics).score()
+        return Flesch(self._statistics, self._min_words).score()
 
     def flesch_kincaid(self):
         """Calculate Flesch-Kincaid Grade Level."""
-        return FleschKincaid(self._statistics).score()
+        return FleschKincaid(self._statistics, self._min_words).score()
 
     def gunning_fog(self):
         """Calculate Gunning Fog score."""
-        return GunningFog(self._statistics).score()
+        return GunningFog(self._statistics, self._min_words).score()
 
     def linsear_write(self):
         """Calculate Linsear Write."""
-        return LinsearWrite(self._statistics).score()
+        return LinsearWrite(self._statistics, self._min_words).score()
 
-    def smog(self,all_sentences=False):
+    def smog(self,all_sentences=False, ignore_length=False):
         """SMOG Index.
         `all_sentences` indicates whether SMOG should use a sample of 30 sentences, as described in the original paper, or if it should use all sentences in the text"""
-        return Smog(self._statistics, self._analyzer.sentences,all_sentences=all_sentences).score()
+        return Smog(self._statistics, self._analyzer.sentences,
+                    all_sentences=all_sentences, ignore_length=ignore_length).score()
 
     def spache(self):
         """Spache Index."""
-        return Spache(self._statistics).score()
+        return Spache(self._statistics, self._min_words).score()
 
     def statistics(self):
         return {
diff --git a/readability/scorers/ari.py b/readability/scorers/ari.py
@@ -14,10 +14,10 @@ def __str__(self):
 
 
 class ARI:
-    def __init__(self, stats):
+    def __init__(self, stats, min_words=100):
         self._stats = stats
-        if stats.num_words < 100:
-            raise ReadabilityException('100 words required.')
+        if stats.num_words < min_words:
+            raise ReadabilityException('{} words required.'.format(min_words))
 
     def score(self):
         score = self._score()
diff --git a/readability/scorers/coleman_liau.py b/readability/scorers/coleman_liau.py
@@ -12,10 +12,10 @@ def __str__(self):
 
 
 class ColemanLiau:
-    def __init__(self, stats):
+    def __init__(self, stats, min_words=100):
         self._stats = stats
-        if stats.num_words < 100:
-            raise ReadabilityException('100 words required.')
+        if stats.num_words < min_words:
+            raise ReadabilityException('{} words required.'.format(min_words))
 
     def score(self):
         score = self._score()
diff --git a/readability/scorers/dale_chall.py b/readability/scorers/dale_chall.py
@@ -12,10 +12,10 @@ def __str__(self):
 
 
 class DaleChall:
-    def __init__(self, stats):
+    def __init__(self, stats, min_words=100):
         self._stats = stats
-        if stats.num_words < 100:
-            raise ReadabilityException('100 words required.')
+        if stats.num_words < min_words:
+            raise ReadabilityException('{} words required.'.format(min_words))
 
     def score(self):
         score = self._score()
diff --git a/readability/scorers/flesch.py b/readability/scorers/flesch.py
@@ -13,10 +13,10 @@ def __str__(self):
 
 
 class Flesch:
-    def __init__(self, stats):
+    def __init__(self, stats, min_words=100):
         self._stats = stats
-        if stats.num_words < 100:
-            raise ReadabilityException('100 words required.')
+        if stats.num_words < min_words:
+            raise ReadabilityException('{} words required.'.format(min_words))
 
     def score(self):
         score = self._score()
diff --git a/readability/scorers/flesch_kincaid.py b/readability/scorers/flesch_kincaid.py
@@ -12,10 +12,10 @@ def __str__(self):
 
 
 class FleschKincaid:
-    def __init__(self, stats):
+    def __init__(self, stats, min_words=100):
         self._stats = stats
-        if stats.num_words < 100:
-            raise ReadabilityException('100 words required.')
+        if stats.num_words < min_words:
+            raise ReadabilityException('{} words required.'.format(min_words))
 
     def score(self):
         score = self._score()
diff --git a/readability/scorers/gunning_fog.py b/readability/scorers/gunning_fog.py
@@ -12,10 +12,10 @@ def __str__(self):
 
 
 class GunningFog:
-    def __init__(self, stats):
+    def __init__(self, stats, min_words=100):
         self._stats = stats
-        if stats.num_words < 100:
-            raise ReadabilityException('100 words required.')
+        if stats.num_words < min_words:
+            raise ReadabilityException('{} words required.'.format(min_words))
 
     def score(self):
         score = self._score()
diff --git a/readability/scorers/linsear_write.py b/readability/scorers/linsear_write.py
@@ -12,10 +12,10 @@ def __str__(self):
 
 
 class LinsearWrite:
-    def __init__(self, stats):
+    def __init__(self, stats, min_words=100):
         self._stats = stats
-        if stats.num_words < 100:
-            raise ReadabilityException('100 words required.')
+        if stats.num_words < min_words:
+            raise ReadabilityException('{} words required.'.format(min_words))
 
     def score(self):
         score = self._score()
diff --git a/readability/scorers/smog.py b/readability/scorers/smog.py
@@ -1,7 +1,7 @@
 import math
 from readability.text.analyzer import Analyzer
 from readability.exceptions import ReadabilityException
-
+import warnings
 
 class Result:
     def __init__(self, score, grade_level):
@@ -14,16 +14,22 @@ def __str__(self):
 
 
 class Smog:
-    def __init__(self, stats, sentences, all_sentences=False):
+    def __init__(self, stats, sentences, all_sentences=False, ignore_length=False):
         """
         Computes the SMOG readability score (Harry McLaughlin, 1969 https://ogg.osu.edu/media/documents/health_lit/WRRSMOG_Readability_Formula_G._Harry_McLaughlin__1969_.pdf)
         If all_sentences is false, computes the score as described in McLaughlin, 1969, using exactly 30 sentences
         If all_sentences is true, adjusts the score to use all sentences in the text
         """
         if stats.num_sentences < 30:
-            raise ReadabilityException(
-                'SMOG requires 30 sentences. {} found'
-                .format(stats.num_sentences))
+            if not ignore_length:
+                raise ReadabilityException(
+                    'SMOG requires 30 sentences. {} found'
+                    .format(stats.num_sentences))
+            else:
+                warnings.warn(
+                    'SMOG requires 30 sentences. {} found'
+                    .format(stats.num_sentences))
+
 
         self._stats = stats
         self.all_sentences = all_sentences
diff --git a/readability/scorers/spache.py b/readability/scorers/spache.py
@@ -12,10 +12,10 @@ def __str__(self):
 
 
 class Spache:
-    def __init__(self, stats):
+    def __init__(self, stats, min_words=100):
         self._stats = stats
-        if stats.num_words < 100:
-            raise ReadabilityException('100 words required.')
+        if stats.num_words < min_words:
+            raise ReadabilityException('{} words required.'.format(min_words))
 
     def score(self):
         score = self._score()