|
1 | 1 | from .text import Analyzer |
2 | 2 | from .scorers import ARI, ColemanLiau, DaleChall, Flesch, \ |
3 | 3 | FleschKincaid, GunningFog, LinsearWrite, Smog, Spache |
4 | | - |
| 4 | +import warnings |
5 | 5 |
|
6 | 6 | class Readability: |
7 | | - def __init__(self, text): |
| 7 | + def __init__(self, text, min_words=100): |
8 | 8 | self._analyzer = Analyzer() |
9 | 9 | self._statistics = self._analyzer.analyze(text) |
| 10 | + self._min_words = min_words |
| 11 | + if self._min_words < 100: |
| 12 | + warnings.warn( |
| 13 | + "Documents with fewer than 100 words may affect the accuracy of readability tests" |
| 14 | + ) |
10 | 15 |
|
11 | 16 | def ari(self): |
12 | 17 | """Calculate Automated Readability Index (ARI).""" |
13 | | - return ARI(self._statistics).score() |
| 18 | + return ARI(self._statistics, self._min_words).score() |
14 | 19 |
|
15 | 20 | def coleman_liau(self): |
16 | 21 | """Calculate Coleman Liau Index.""" |
17 | | - return ColemanLiau(self._statistics).score() |
| 22 | + return ColemanLiau(self._statistics, self._min_words).score() |
18 | 23 |
|
19 | 24 | def dale_chall(self): |
20 | 25 | """Calculate Dale Chall.""" |
21 | | - return DaleChall(self._statistics).score() |
| 26 | + return DaleChall(self._statistics, self._min_words).score() |
22 | 27 |
|
23 | 28 | def flesch(self): |
24 | 29 | """Calculate Flesch Reading Ease score.""" |
25 | | - return Flesch(self._statistics).score() |
| 30 | + return Flesch(self._statistics, self._min_words).score() |
26 | 31 |
|
27 | 32 | def flesch_kincaid(self): |
28 | 33 | """Calculate Flesch-Kincaid Grade Level.""" |
29 | | - return FleschKincaid(self._statistics).score() |
| 34 | + return FleschKincaid(self._statistics, self._min_words).score() |
30 | 35 |
|
31 | 36 | def gunning_fog(self): |
32 | 37 | """Calculate Gunning Fog score.""" |
33 | | - return GunningFog(self._statistics).score() |
| 38 | + return GunningFog(self._statistics, self._min_words).score() |
34 | 39 |
|
35 | 40 | def linsear_write(self): |
36 | 41 | """Calculate Linsear Write.""" |
37 | | - return LinsearWrite(self._statistics).score() |
| 42 | + return LinsearWrite(self._statistics, self._min_words).score() |
38 | 43 |
|
39 | | - def smog(self,all_sentences=False): |
| 44 | + def smog(self,all_sentences=False, ignore_length=False): |
40 | 45 | """SMOG Index. |
41 | 46 | `all_sentences` indicates whether SMOG should use a sample of 30 sentences, as described in the original paper, or if it should use all sentences in the text""" |
42 | | - return Smog(self._statistics, self._analyzer.sentences,all_sentences=all_sentences).score() |
| 47 | + return Smog(self._statistics, self._analyzer.sentences, |
| 48 | + all_sentences=all_sentences, ignore_length=ignore_length).score() |
43 | 49 |
|
44 | 50 | def spache(self): |
45 | 51 | """Spache Index.""" |
46 | | - return Spache(self._statistics).score() |
| 52 | + return Spache(self._statistics, self._min_words).score() |
47 | 53 |
|
48 | 54 | def statistics(self): |
49 | 55 | return { |
|
0 commit comments