Merge branch 'master' of https://github.com/ETS-Next-Gen/writing_observer

bradley-erickson · bradley-erickson · commit 3bda6dae186a · 2023-02-15T18:53:22.000-05:00
diff --git a/.github/workflows/pycodestyle.yml b/.github/workflows/pycodestyle.yml
@@ -20,4 +20,4 @@ jobs:
         pip install pycodestyle
     - name: Analysing the code with pycodestyle
       run: |
-        pycodestyle --ignore=E501,W503 $(git ls-files 'learning_observer/*.py' 'modules/*.py')
+        pycodestyle --ignore=E501,W503,E731 $(git ls-files 'learning_observer/*.py' 'modules/*.py')
diff --git a/modules/writing_observer/writing_observer/aggregator.py b/modules/writing_observer/writing_observer/aggregator.py
@@ -2,14 +2,15 @@
 import time
 
 import learning_observer.settings
+import learning_observer.stream_analytics.helpers
 import learning_observer.util
 
 
 def excerpt_active_text(
     text, cursor_position,
-    desired_length=103, cursor_target=2/3, max_overflow=10,
-    cursor_character = "❙"
-    ):
+    desired_length=103, cursor_target=2 / 3, max_overflow=10,
+    cursor_character="❙"
+):
     '''
     This function returns a short segment of student text, cutting in a
     sensible way around word boundaries. This can be used for real-time
@@ -122,8 +123,6 @@ def aggregate_course_summary_stats(student_data):
 #
 ######
 
-import learning_observer.stream_analytics.helpers
-
 
 async def get_latest_student_documents(student_data):
     '''
diff --git a/modules/writing_observer/writing_observer/awe_nlp.py b/modules/writing_observer/writing_observer/awe_nlp.py
@@ -30,6 +30,7 @@
 
 RUN_MODES = enum.Enum('RUN_MODES', 'MULTIPROCESSING SERIAL')
 
+
 def init_nlp():
     '''
     Initialize the spacy pipeline with the AWE components. This takes a while
@@ -51,6 +52,7 @@ def init_nlp():
     nlp.add_pipe('contentsegmentation')
     return nlp
 
+
 nlp = init_nlp()
 
 
@@ -64,39 +66,37 @@ def outputIndicator(doc, indicatorName, itype, stype=None, text=None, added_filt
     indicator = {}
 
     if added_filter is None:
-        theFilter = [(indicatorName,[True]),('is_alpha',[True])]
+        theFilter = [(indicatorName, [True]), ('is_alpha', [True])]
     else:
         theFilter = added_filter
-        theFilter.append(('is_alpha',[True]))
+        theFilter.append(('is_alpha', [True]))
 
     indicator['metric'] =\
         doc._.AWE_Info(infoType=itype,
-                        indicator=indicatorName,
-                        filters=theFilter,
-                        summaryType=stype)  
-    
+                       indicator=indicatorName,
+                       filters=theFilter,
+                       summaryType=stype)
+
     data = json.loads(
         doc._.AWE_Info(infoType=itype,
-                        indicator=indicatorName,
-                        filters=theFilter)).values()
+                       indicator=indicatorName,
+                       filters=theFilter)).values()
 
     indicator['offsets'] = \
-        [[entry['offset'],entry['length']] \
-         for entry \
-         in data]
+        [[entry['offset'], entry['length']] for entry in data]
 
     if itype == 'Token':
         indicator['text'] = \
             json.loads(doc._.AWE_Info(infoType=itype,
-                   indicator=indicatorName, 
-                   filters=theFilter,
-                   transformations=['lemma'],
-                   summaryType='uniq'))
+                                      indicator=indicatorName,
+                                      filters=theFilter,
+                                      transformations=['lemma'],
+                                      summaryType='uniq'))
     else:
         indicator['text'] = []
 
         for span in indicator['offsets']:
-            indicator['text'].append(text[int(span[0]):int(span[0])+int(span[1])])
+            indicator['text'].append(text[int(span[0]):int(span[0]) + int(span[1])])
 
     return indicator
 
@@ -148,6 +148,7 @@ async def process_texts_serial(texts, options=None):
 
 executor = None
 
+
 def run_in_fork(func):
     '''
     This will run a function in a forked subproces, for isolation.
@@ -191,7 +192,7 @@ async def process_texts_parallel(texts, options=None):
         try:
             annotations = await result_future
             annotations['text'] = text
-        except: # awe_components.errors.AWE_Workbench_Error and nltk.corpus.reader.wordnet.WordNetError
+        except Exception:
             raise
             annotations = "Error"
         annotated.append(annotations)
@@ -250,7 +251,7 @@ async def process_texts(writing_data, options=None, mode=RUN_MODES.MULTIPROCESSI
     return results
 
 
-if  __name__ == '__main__':
+if __name__ == '__main__':
     import time
     import writing_observer.sample_essays
     # Run over a sample text
@@ -270,9 +271,9 @@ async def process_texts(writing_data, options=None, mode=RUN_MODES.MULTIPROCESSI
     results3 = asyncio.run(process_texts_serial(example_texts[0:8]))
     t4 = time.time()
     print(results2)
-    print("Single time", t2-t1)
-    print("Parallel time", t3-t2)
-    print("Serial time", t4-t3)
+    print("Single time", t2 - t1)
+    print("Parallel time", t3 - t2)
+    print("Serial time", t4 - t3)
     print("Note that these results are imperfect -- ")
-    print("Errors", len([r for r in results2 if r=="Error"]))
-    print("Errors", [r if r=="Error" else "--" for r in results2])
+    print("Errors", len([r for r in results2 if r == "Error"]))
+    print("Errors", [r if r == "Error" else "--" for r in results2])
diff --git a/modules/writing_observer/writing_observer/nlp_indicators.py b/modules/writing_observer/writing_observer/nlp_indicators.py
@@ -3,7 +3,7 @@
 # Define a set of indicators with the kind of filtering/summariation we want
 #
 # Academic Language, Latinate Words, Low Frequency Words, Adjectives, Adverbs,
-#    Sentences, Paragraphs -- 
+#    Sentences, Paragraphs --
 #    just need to have lexicalfeatures in the pipeline to run.
 #
 # Transition Words, Ordinal Transition Words --
@@ -35,76 +35,76 @@
     ('Statements of Opinion', 'Doc', 'vwp_statements_of_opinion', None, 'percent'),
     ('Statements of Fact', 'Doc', 'vwp_statements_of_fact', None, 'percent'),
     # Transitions
-    # eventually we want to exclude \n\n as transitions using `[('!=',['introductory'])]`
+    # eventually we want to exclude \n\n as transitions using `[('!=', ['introductory'])]`
     # however the introductory category also includes "let us" and "let's"
     # no highlighting is shown on the new lines, so we won't remove it for now.
     ('Transition Words', 'Doc', 'transitions', None, 'counts'),
     #
-    ('Positive Transition Words', 'Doc', 'transitions',[('==',['positive'])], 'total'),
-    ('Conditional Transition Words', 'Doc', 'transitions',[('==',['conditional'])], 'total'),
-    ('Consequential Transition Words', 'Doc', 'transitions',[('==',['consequential'])], 'total'),
-    ('Contrastive Transition Words', 'Doc', 'transitions',[('==',['contrastive'])], 'total'),
-    ('Counterpoint Transition Words', 'Doc', 'transitions',[('==',['counterpoint'])], 'total'),
-    ('Comparative Transition Words', 'Doc', 'transitions',[('==',['comparative'])], 'total'),
-    ('Cross Referential Transition Words', 'Doc', 'transitions',[('==',['crossreferential'])], 'total'),
-    ('Illustrative Transition Words', 'Doc', 'transitions',[('==',['illustrative'])], 'total'),
-    ('Negative Transition Words', 'Doc', 'transitions',[('==',['negative'])], 'total'),
-    ('Emphatic Transition Words', 'Doc', 'transitions',[('==',['emphatic'])], 'total'),
-    ('Evenidentiary Transition Words', 'Doc', 'transitions',[('==',['evidentiary'])], 'total'),
-    ('General Transition Words', 'Doc', 'transitions',[('==',['general'])], 'total'),
-    ('Ordinal Transition Words', 'Doc', 'transitions',[('==',['ordinal'])], 'total'),
-    ('Purposive Transition Words', 'Doc', 'transitions',[('==',['purposive'])], 'total'),
-    ('Periphrastic Transition Words', 'Doc', 'transitions',[('==',['periphrastic'])], 'total'),
-    ('Hypothetical Transition Words', 'Doc', 'transitions',[('==',['hypothetical'])], 'total'),
-    ('Summative Transition Words', 'Doc', 'transitions',[('==',['summative'])], 'total'),
-    ('Introductory Transition Words', 'Doc', 'transitions',[('==',['introductory'])], 'total'),
+    ('Positive Transition Words', 'Doc', 'transitions', [('==', ['positive'])], 'total'),
+    ('Conditional Transition Words', 'Doc', 'transitions', [('==', ['conditional'])], 'total'),
+    ('Consequential Transition Words', 'Doc', 'transitions', [('==', ['consequential'])], 'total'),
+    ('Contrastive Transition Words', 'Doc', 'transitions', [('==', ['contrastive'])], 'total'),
+    ('Counterpoint Transition Words', 'Doc', 'transitions', [('==', ['counterpoint'])], 'total'),
+    ('Comparative Transition Words', 'Doc', 'transitions', [('==', ['comparative'])], 'total'),
+    ('Cross Referential Transition Words', 'Doc', 'transitions', [('==', ['crossreferential'])], 'total'),
+    ('Illustrative Transition Words', 'Doc', 'transitions', [('==', ['illustrative'])], 'total'),
+    ('Negative Transition Words', 'Doc', 'transitions', [('==', ['negative'])], 'total'),
+    ('Emphatic Transition Words', 'Doc', 'transitions', [('==', ['emphatic'])], 'total'),
+    ('Evenidentiary Transition Words', 'Doc', 'transitions', [('==', ['evidentiary'])], 'total'),
+    ('General Transition Words', 'Doc', 'transitions', [('==', ['general'])], 'total'),
+    ('Ordinal Transition Words', 'Doc', 'transitions', [('==', ['ordinal'])], 'total'),
+    ('Purposive Transition Words', 'Doc', 'transitions', [('==', ['purposive'])], 'total'),
+    ('Periphrastic Transition Words', 'Doc', 'transitions', [('==', ['periphrastic'])], 'total'),
+    ('Hypothetical Transition Words', 'Doc', 'transitions', [('==', ['hypothetical'])], 'total'),
+    ('Summative Transition Words', 'Doc', 'transitions', [('==', ['summative'])], 'total'),
+    ('Introductory Transition Words', 'Doc', 'transitions', [('==', ['introductory'])], 'total'),
     # pos_
-    ('Adjectives', 'Token', 'pos_', [('==',['ADJ'])], 'total'),
-    ('Adverbs', 'Token', 'pos_', [('==',['ADV'])], 'total'),
-    ('Nouns', 'Token', 'pos_', [('==',['NOUN'])], 'total'),
-    ('Proper Nouns', 'Token', 'pos_', [('==',['PROPN'])], 'total'),
-    ('Verbs', 'Token', 'pos_', [('==',['VERB'])], 'total'),
-    ('Numbers', 'Token', 'pos_', [('==',['NUM'])], 'total'),
-    ('Prepositions', 'Token', 'pos_', [('==',['ADP'])], 'total'),
-    ('Coordinating Conjunction', 'Token', 'pos_', [('==',['CCONJ'])], 'total'),
-    ('Subordinating Conjunction', 'Token', 'pos_', [('==',['SCONJ'])], 'total'),
-    ('Auxiliary Verb', 'Token', 'pos_', [('==',['AUX'])], 'total'),
-    ('Pronoun', 'Token', 'pos_', [('==',['PRON'])], 'total'),
+    ('Adjectives', 'Token', 'pos_', [('==', ['ADJ'])], 'total'),
+    ('Adverbs', 'Token', 'pos_', [('==', ['ADV'])], 'total'),
+    ('Nouns', 'Token', 'pos_', [('==', ['NOUN'])], 'total'),
+    ('Proper Nouns', 'Token', 'pos_', [('==', ['PROPN'])], 'total'),
+    ('Verbs', 'Token', 'pos_', [('==', ['VERB'])], 'total'),
+    ('Numbers', 'Token', 'pos_', [('==', ['NUM'])], 'total'),
+    ('Prepositions', 'Token', 'pos_', [('==', ['ADP'])], 'total'),
+    ('Coordinating Conjunction', 'Token', 'pos_', [('==', ['CCONJ'])], 'total'),
+    ('Subordinating Conjunction', 'Token', 'pos_', [('==', ['SCONJ'])], 'total'),
+    ('Auxiliary Verb', 'Token', 'pos_', [('==', ['AUX'])], 'total'),
+    ('Pronoun', 'Token', 'pos_', [('==', ['PRON'])], 'total'),
     # sentence variety
     ('Sentence Types', 'Doc', 'sentence_types', None, 'counts'),
-    ('Simple Sentences', 'Doc', 'sentence_types',[('==',['Simple'])], 'total'),
-    ('Simple with Complex Predicates', 'Doc', 'sentence_types',[('==',['SimpleComplexPred'])], 'total'),
-    ('Simple with Compound Predicates', 'Doc', 'sentence_types',[('==',['SimpleCompoundPred'])], 'total'),
-    ('Simple with Compound Complex Predicates', 'Doc', 'sentence_types',[('==',['SimpleCompoundComplexPred'])], 'total'),
-    ('Compound Sentences', 'Doc', 'sentence_types',[('==',['Compound'])], 'total'),
-    ('Complex Sentences', 'Doc', 'sentence_types',[('==',['Complex'])], 'total'),
-    ('Compound Complex Sentences', 'Doc', 'sentence_types',[('==',['CompoundComplex'])], 'total'),
+    ('Simple Sentences', 'Doc', 'sentence_types', [('==', ['Simple'])], 'total'),
+    ('Simple with Complex Predicates', 'Doc', 'sentence_types', [('==', ['SimpleComplexPred'])], 'total'),
+    ('Simple with Compound Predicates', 'Doc', 'sentence_types', [('==', ['SimpleCompoundPred'])], 'total'),
+    ('Simple with Compound Complex Predicates', 'Doc', 'sentence_types', [('==', ['SimpleCompoundComplexPred'])], 'total'),
+    ('Compound Sentences', 'Doc', 'sentence_types', [('==', ['Compound'])], 'total'),
+    ('Complex Sentences', 'Doc', 'sentence_types', [('==', ['Complex'])], 'total'),
+    ('Compound Complex Sentences', 'Doc', 'sentence_types', [('==', ['CompoundComplex'])], 'total'),
     # Sources/Attributes/Citations/Quotes
     ('Information Sources', 'Token', 'vwp_source', None, 'percent'),
     ('Attributions', 'Token', 'vwp_attribution', None, 'percent'),
     ('Citations', 'Token', 'vwp_cite', None, 'percent'),
     ('Quoted Words', 'Token', 'vwp_quoted', None, 'percent'),
     # Dialogue
-    ('Direct Speech Verbs', 'Doc', 'vwp_direct_speech', None, 'percent'), # TODO needs new label
-    ('Indirect Speech Quotation', 'Token', 'vwp_in_direct_speech', None, 'percent'), # TODO needs new label
+    ('Direct Speech Verbs', 'Doc', 'vwp_direct_speech', None, 'percent'),  # TODO needs new label
+    ('Indirect Speech Quotation', 'Token', 'vwp_in_direct_speech', None, 'percent'),  # TODO needs new label
     # vwp_quoted - already used above
     # tone
-    ('Positive Tone', 'Token', 'vwp_tone', [('>',[.4])], 'percent'),
-    ('Negative Tone', 'Token', 'vwp_tone', [('<',[-.4])], 'percent'),
+    ('Positive Tone', 'Token', 'vwp_tone', [('>', [.4])], 'percent'),
+    ('Negative Tone', 'Token', 'vwp_tone', [('<', [-.4])], 'percent'),
     # details
     ('Concrete Details', 'Token', 'concrete_details', None, 'percent'),
     ('Main Idea Sentences', 'Doc', 'main_ideas', None, 'total'),
     ('Supporting Idea Sentences', 'Doc', 'supporting_ideas', None, 'total'),
     ('Supporting Detail Sentences', 'Doc', 'supporting_details', None, 'total'),
     # Other items
-    ('Polysyllabic Words', 'Token', 'nSyll', [('>',[3])], 'percent'),
-    ('Low Frequency Words', 'Token', 'max_freq', [('<',[4])], 'percent'),
+    ('Polysyllabic Words', 'Token', 'nSyll', [('>', [3])], 'percent'),
+    ('Low Frequency Words', 'Token', 'max_freq', [('<', [4])], 'percent'),
     ('Sentences', 'Doc', 'sents', None, 'total'),
     ('Paragraphs', 'Doc', 'delimiter_\n', None, 'total'),
     ('Character Trait Words', 'Token', 'vwp_character', None, 'percent'),
     ('In Past Tense', 'Token', 'in_past_tense_scope', None, 'percent'),
-    ('Propositional Attitudes', 'Doc', 'vwp_propositional_attitudes', None, 'percent'), # TODO
-    ('Social Awareness', 'Doc', 'vwp_social_awareness', None, 'percent') # TODO
+    ('Propositional Attitudes', 'Doc', 'vwp_propositional_attitudes', None, 'percent'),
+    ('Social Awareness', 'Doc', 'vwp_social_awareness', None, 'percent')
 ]
 
 # Create indicator dict to easily refer to each tuple above by name
@@ -115,6 +115,7 @@
     INDICATOR_W_IDS.append((id, ) + indicator)
     INDICATORS[id] = (id, ) + indicator
 
+
 class NLPIndicators(dataobject):
     id: str
     name: str
@@ -124,5 +125,6 @@ class NLPIndicators(dataobject):
     function: str
     # tooltip: str
 
+
 indicators = map(lambda ind: NLPIndicators(*ind), INDICATOR_W_IDS)
 INDICATOR_JSONS = [asdict(ind) for ind in indicators]
diff --git a/modules/writing_observer/writing_observer/sample_essays.py b/modules/writing_observer/writing_observer/sample_essays.py
diff --git a/modules/writing_observer/writing_observer/writing_analysis.py b/modules/writing_observer/writing_observer/writing_analysis.py