Skip to content

Commit 3bda6da

Browse files
2 parents 21935d3 + 6f9e3ce commit 3bda6da

File tree

6 files changed

+97
-94
lines changed

6 files changed

+97
-94
lines changed

.github/workflows/pycodestyle.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,4 @@ jobs:
2020
pip install pycodestyle
2121
- name: Analysing the code with pycodestyle
2222
run: |
23-
pycodestyle --ignore=E501,W503 $(git ls-files 'learning_observer/*.py' 'modules/*.py')
23+
pycodestyle --ignore=E501,W503,E731 $(git ls-files 'learning_observer/*.py' 'modules/*.py')

modules/writing_observer/writing_observer/aggregator.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,15 @@
22
import time
33

44
import learning_observer.settings
5+
import learning_observer.stream_analytics.helpers
56
import learning_observer.util
67

78

89
def excerpt_active_text(
910
text, cursor_position,
10-
desired_length=103, cursor_target=2/3, max_overflow=10,
11-
cursor_character = "❙"
12-
):
11+
desired_length=103, cursor_target=2 / 3, max_overflow=10,
12+
cursor_character="❙"
13+
):
1314
'''
1415
This function returns a short segment of student text, cutting in a
1516
sensible way around word boundaries. This can be used for real-time
@@ -122,8 +123,6 @@ def aggregate_course_summary_stats(student_data):
122123
#
123124
######
124125

125-
import learning_observer.stream_analytics.helpers
126-
127126

128127
async def get_latest_student_documents(student_data):
129128
'''

modules/writing_observer/writing_observer/awe_nlp.py

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030

3131
RUN_MODES = enum.Enum('RUN_MODES', 'MULTIPROCESSING SERIAL')
3232

33+
3334
def init_nlp():
3435
'''
3536
Initialize the spacy pipeline with the AWE components. This takes a while
@@ -51,6 +52,7 @@ def init_nlp():
5152
nlp.add_pipe('contentsegmentation')
5253
return nlp
5354

55+
5456
nlp = init_nlp()
5557

5658

@@ -64,39 +66,37 @@ def outputIndicator(doc, indicatorName, itype, stype=None, text=None, added_filt
6466
indicator = {}
6567

6668
if added_filter is None:
67-
theFilter = [(indicatorName,[True]),('is_alpha',[True])]
69+
theFilter = [(indicatorName, [True]), ('is_alpha', [True])]
6870
else:
6971
theFilter = added_filter
70-
theFilter.append(('is_alpha',[True]))
72+
theFilter.append(('is_alpha', [True]))
7173

7274
indicator['metric'] =\
7375
doc._.AWE_Info(infoType=itype,
74-
indicator=indicatorName,
75-
filters=theFilter,
76-
summaryType=stype)
77-
76+
indicator=indicatorName,
77+
filters=theFilter,
78+
summaryType=stype)
79+
7880
data = json.loads(
7981
doc._.AWE_Info(infoType=itype,
80-
indicator=indicatorName,
81-
filters=theFilter)).values()
82+
indicator=indicatorName,
83+
filters=theFilter)).values()
8284

8385
indicator['offsets'] = \
84-
[[entry['offset'],entry['length']] \
85-
for entry \
86-
in data]
86+
[[entry['offset'], entry['length']] for entry in data]
8787

8888
if itype == 'Token':
8989
indicator['text'] = \
9090
json.loads(doc._.AWE_Info(infoType=itype,
91-
indicator=indicatorName,
92-
filters=theFilter,
93-
transformations=['lemma'],
94-
summaryType='uniq'))
91+
indicator=indicatorName,
92+
filters=theFilter,
93+
transformations=['lemma'],
94+
summaryType='uniq'))
9595
else:
9696
indicator['text'] = []
9797

9898
for span in indicator['offsets']:
99-
indicator['text'].append(text[int(span[0]):int(span[0])+int(span[1])])
99+
indicator['text'].append(text[int(span[0]):int(span[0]) + int(span[1])])
100100

101101
return indicator
102102

@@ -148,6 +148,7 @@ async def process_texts_serial(texts, options=None):
148148

149149
executor = None
150150

151+
151152
def run_in_fork(func):
152153
'''
153154
This will run a function in a forked subproces, for isolation.
@@ -191,7 +192,7 @@ async def process_texts_parallel(texts, options=None):
191192
try:
192193
annotations = await result_future
193194
annotations['text'] = text
194-
except: # awe_components.errors.AWE_Workbench_Error and nltk.corpus.reader.wordnet.WordNetError
195+
except Exception:
195196
raise
196197
annotations = "Error"
197198
annotated.append(annotations)
@@ -250,7 +251,7 @@ async def process_texts(writing_data, options=None, mode=RUN_MODES.MULTIPROCESSI
250251
return results
251252

252253

253-
if __name__ == '__main__':
254+
if __name__ == '__main__':
254255
import time
255256
import writing_observer.sample_essays
256257
# Run over a sample text
@@ -270,9 +271,9 @@ async def process_texts(writing_data, options=None, mode=RUN_MODES.MULTIPROCESSI
270271
results3 = asyncio.run(process_texts_serial(example_texts[0:8]))
271272
t4 = time.time()
272273
print(results2)
273-
print("Single time", t2-t1)
274-
print("Parallel time", t3-t2)
275-
print("Serial time", t4-t3)
274+
print("Single time", t2 - t1)
275+
print("Parallel time", t3 - t2)
276+
print("Serial time", t4 - t3)
276277
print("Note that these results are imperfect -- ")
277-
print("Errors", len([r for r in results2 if r=="Error"]))
278-
print("Errors", [r if r=="Error" else "--" for r in results2])
278+
print("Errors", len([r for r in results2 if r == "Error"]))
279+
print("Errors", [r if r == "Error" else "--" for r in results2])

modules/writing_observer/writing_observer/nlp_indicators.py

Lines changed: 48 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Define a set of indicators with the kind of filtering/summariation we want
44
#
55
# Academic Language, Latinate Words, Low Frequency Words, Adjectives, Adverbs,
6-
# Sentences, Paragraphs --
6+
# Sentences, Paragraphs --
77
# just need to have lexicalfeatures in the pipeline to run.
88
#
99
# Transition Words, Ordinal Transition Words --
@@ -35,76 +35,76 @@
3535
('Statements of Opinion', 'Doc', 'vwp_statements_of_opinion', None, 'percent'),
3636
('Statements of Fact', 'Doc', 'vwp_statements_of_fact', None, 'percent'),
3737
# Transitions
38-
# eventually we want to exclude \n\n as transitions using `[('!=',['introductory'])]`
38+
# eventually we want to exclude \n\n as transitions using `[('!=', ['introductory'])]`
3939
# however the introductory category also includes "let us" and "let's"
4040
# no highlighting is shown on the new lines, so we won't remove it for now.
4141
('Transition Words', 'Doc', 'transitions', None, 'counts'),
4242
#
43-
('Positive Transition Words', 'Doc', 'transitions',[('==',['positive'])], 'total'),
44-
('Conditional Transition Words', 'Doc', 'transitions',[('==',['conditional'])], 'total'),
45-
('Consequential Transition Words', 'Doc', 'transitions',[('==',['consequential'])], 'total'),
46-
('Contrastive Transition Words', 'Doc', 'transitions',[('==',['contrastive'])], 'total'),
47-
('Counterpoint Transition Words', 'Doc', 'transitions',[('==',['counterpoint'])], 'total'),
48-
('Comparative Transition Words', 'Doc', 'transitions',[('==',['comparative'])], 'total'),
49-
('Cross Referential Transition Words', 'Doc', 'transitions',[('==',['crossreferential'])], 'total'),
50-
('Illustrative Transition Words', 'Doc', 'transitions',[('==',['illustrative'])], 'total'),
51-
('Negative Transition Words', 'Doc', 'transitions',[('==',['negative'])], 'total'),
52-
('Emphatic Transition Words', 'Doc', 'transitions',[('==',['emphatic'])], 'total'),
53-
('Evenidentiary Transition Words', 'Doc', 'transitions',[('==',['evidentiary'])], 'total'),
54-
('General Transition Words', 'Doc', 'transitions',[('==',['general'])], 'total'),
55-
('Ordinal Transition Words', 'Doc', 'transitions',[('==',['ordinal'])], 'total'),
56-
('Purposive Transition Words', 'Doc', 'transitions',[('==',['purposive'])], 'total'),
57-
('Periphrastic Transition Words', 'Doc', 'transitions',[('==',['periphrastic'])], 'total'),
58-
('Hypothetical Transition Words', 'Doc', 'transitions',[('==',['hypothetical'])], 'total'),
59-
('Summative Transition Words', 'Doc', 'transitions',[('==',['summative'])], 'total'),
60-
('Introductory Transition Words', 'Doc', 'transitions',[('==',['introductory'])], 'total'),
43+
('Positive Transition Words', 'Doc', 'transitions', [('==', ['positive'])], 'total'),
44+
('Conditional Transition Words', 'Doc', 'transitions', [('==', ['conditional'])], 'total'),
45+
('Consequential Transition Words', 'Doc', 'transitions', [('==', ['consequential'])], 'total'),
46+
('Contrastive Transition Words', 'Doc', 'transitions', [('==', ['contrastive'])], 'total'),
47+
('Counterpoint Transition Words', 'Doc', 'transitions', [('==', ['counterpoint'])], 'total'),
48+
('Comparative Transition Words', 'Doc', 'transitions', [('==', ['comparative'])], 'total'),
49+
('Cross Referential Transition Words', 'Doc', 'transitions', [('==', ['crossreferential'])], 'total'),
50+
('Illustrative Transition Words', 'Doc', 'transitions', [('==', ['illustrative'])], 'total'),
51+
('Negative Transition Words', 'Doc', 'transitions', [('==', ['negative'])], 'total'),
52+
('Emphatic Transition Words', 'Doc', 'transitions', [('==', ['emphatic'])], 'total'),
53+
('Evenidentiary Transition Words', 'Doc', 'transitions', [('==', ['evidentiary'])], 'total'),
54+
('General Transition Words', 'Doc', 'transitions', [('==', ['general'])], 'total'),
55+
('Ordinal Transition Words', 'Doc', 'transitions', [('==', ['ordinal'])], 'total'),
56+
('Purposive Transition Words', 'Doc', 'transitions', [('==', ['purposive'])], 'total'),
57+
('Periphrastic Transition Words', 'Doc', 'transitions', [('==', ['periphrastic'])], 'total'),
58+
('Hypothetical Transition Words', 'Doc', 'transitions', [('==', ['hypothetical'])], 'total'),
59+
('Summative Transition Words', 'Doc', 'transitions', [('==', ['summative'])], 'total'),
60+
('Introductory Transition Words', 'Doc', 'transitions', [('==', ['introductory'])], 'total'),
6161
# pos_
62-
('Adjectives', 'Token', 'pos_', [('==',['ADJ'])], 'total'),
63-
('Adverbs', 'Token', 'pos_', [('==',['ADV'])], 'total'),
64-
('Nouns', 'Token', 'pos_', [('==',['NOUN'])], 'total'),
65-
('Proper Nouns', 'Token', 'pos_', [('==',['PROPN'])], 'total'),
66-
('Verbs', 'Token', 'pos_', [('==',['VERB'])], 'total'),
67-
('Numbers', 'Token', 'pos_', [('==',['NUM'])], 'total'),
68-
('Prepositions', 'Token', 'pos_', [('==',['ADP'])], 'total'),
69-
('Coordinating Conjunction', 'Token', 'pos_', [('==',['CCONJ'])], 'total'),
70-
('Subordinating Conjunction', 'Token', 'pos_', [('==',['SCONJ'])], 'total'),
71-
('Auxiliary Verb', 'Token', 'pos_', [('==',['AUX'])], 'total'),
72-
('Pronoun', 'Token', 'pos_', [('==',['PRON'])], 'total'),
62+
('Adjectives', 'Token', 'pos_', [('==', ['ADJ'])], 'total'),
63+
('Adverbs', 'Token', 'pos_', [('==', ['ADV'])], 'total'),
64+
('Nouns', 'Token', 'pos_', [('==', ['NOUN'])], 'total'),
65+
('Proper Nouns', 'Token', 'pos_', [('==', ['PROPN'])], 'total'),
66+
('Verbs', 'Token', 'pos_', [('==', ['VERB'])], 'total'),
67+
('Numbers', 'Token', 'pos_', [('==', ['NUM'])], 'total'),
68+
('Prepositions', 'Token', 'pos_', [('==', ['ADP'])], 'total'),
69+
('Coordinating Conjunction', 'Token', 'pos_', [('==', ['CCONJ'])], 'total'),
70+
('Subordinating Conjunction', 'Token', 'pos_', [('==', ['SCONJ'])], 'total'),
71+
('Auxiliary Verb', 'Token', 'pos_', [('==', ['AUX'])], 'total'),
72+
('Pronoun', 'Token', 'pos_', [('==', ['PRON'])], 'total'),
7373
# sentence variety
7474
('Sentence Types', 'Doc', 'sentence_types', None, 'counts'),
75-
('Simple Sentences', 'Doc', 'sentence_types',[('==',['Simple'])], 'total'),
76-
('Simple with Complex Predicates', 'Doc', 'sentence_types',[('==',['SimpleComplexPred'])], 'total'),
77-
('Simple with Compound Predicates', 'Doc', 'sentence_types',[('==',['SimpleCompoundPred'])], 'total'),
78-
('Simple with Compound Complex Predicates', 'Doc', 'sentence_types',[('==',['SimpleCompoundComplexPred'])], 'total'),
79-
('Compound Sentences', 'Doc', 'sentence_types',[('==',['Compound'])], 'total'),
80-
('Complex Sentences', 'Doc', 'sentence_types',[('==',['Complex'])], 'total'),
81-
('Compound Complex Sentences', 'Doc', 'sentence_types',[('==',['CompoundComplex'])], 'total'),
75+
('Simple Sentences', 'Doc', 'sentence_types', [('==', ['Simple'])], 'total'),
76+
('Simple with Complex Predicates', 'Doc', 'sentence_types', [('==', ['SimpleComplexPred'])], 'total'),
77+
('Simple with Compound Predicates', 'Doc', 'sentence_types', [('==', ['SimpleCompoundPred'])], 'total'),
78+
('Simple with Compound Complex Predicates', 'Doc', 'sentence_types', [('==', ['SimpleCompoundComplexPred'])], 'total'),
79+
('Compound Sentences', 'Doc', 'sentence_types', [('==', ['Compound'])], 'total'),
80+
('Complex Sentences', 'Doc', 'sentence_types', [('==', ['Complex'])], 'total'),
81+
('Compound Complex Sentences', 'Doc', 'sentence_types', [('==', ['CompoundComplex'])], 'total'),
8282
# Sources/Attributes/Citations/Quotes
8383
('Information Sources', 'Token', 'vwp_source', None, 'percent'),
8484
('Attributions', 'Token', 'vwp_attribution', None, 'percent'),
8585
('Citations', 'Token', 'vwp_cite', None, 'percent'),
8686
('Quoted Words', 'Token', 'vwp_quoted', None, 'percent'),
8787
# Dialogue
88-
('Direct Speech Verbs', 'Doc', 'vwp_direct_speech', None, 'percent'), # TODO needs new label
89-
('Indirect Speech Quotation', 'Token', 'vwp_in_direct_speech', None, 'percent'), # TODO needs new label
88+
('Direct Speech Verbs', 'Doc', 'vwp_direct_speech', None, 'percent'), # TODO needs new label
89+
('Indirect Speech Quotation', 'Token', 'vwp_in_direct_speech', None, 'percent'), # TODO needs new label
9090
# vwp_quoted - already used above
9191
# tone
92-
('Positive Tone', 'Token', 'vwp_tone', [('>',[.4])], 'percent'),
93-
('Negative Tone', 'Token', 'vwp_tone', [('<',[-.4])], 'percent'),
92+
('Positive Tone', 'Token', 'vwp_tone', [('>', [.4])], 'percent'),
93+
('Negative Tone', 'Token', 'vwp_tone', [('<', [-.4])], 'percent'),
9494
# details
9595
('Concrete Details', 'Token', 'concrete_details', None, 'percent'),
9696
('Main Idea Sentences', 'Doc', 'main_ideas', None, 'total'),
9797
('Supporting Idea Sentences', 'Doc', 'supporting_ideas', None, 'total'),
9898
('Supporting Detail Sentences', 'Doc', 'supporting_details', None, 'total'),
9999
# Other items
100-
('Polysyllabic Words', 'Token', 'nSyll', [('>',[3])], 'percent'),
101-
('Low Frequency Words', 'Token', 'max_freq', [('<',[4])], 'percent'),
100+
('Polysyllabic Words', 'Token', 'nSyll', [('>', [3])], 'percent'),
101+
('Low Frequency Words', 'Token', 'max_freq', [('<', [4])], 'percent'),
102102
('Sentences', 'Doc', 'sents', None, 'total'),
103103
('Paragraphs', 'Doc', 'delimiter_\n', None, 'total'),
104104
('Character Trait Words', 'Token', 'vwp_character', None, 'percent'),
105105
('In Past Tense', 'Token', 'in_past_tense_scope', None, 'percent'),
106-
('Propositional Attitudes', 'Doc', 'vwp_propositional_attitudes', None, 'percent'), # TODO
107-
('Social Awareness', 'Doc', 'vwp_social_awareness', None, 'percent') # TODO
106+
('Propositional Attitudes', 'Doc', 'vwp_propositional_attitudes', None, 'percent'),
107+
('Social Awareness', 'Doc', 'vwp_social_awareness', None, 'percent')
108108
]
109109

110110
# Create indicator dict to easily refer to each tuple above by name
@@ -115,6 +115,7 @@
115115
INDICATOR_W_IDS.append((id, ) + indicator)
116116
INDICATORS[id] = (id, ) + indicator
117117

118+
118119
class NLPIndicators(dataobject):
119120
id: str
120121
name: str
@@ -124,5 +125,6 @@ class NLPIndicators(dataobject):
124125
function: str
125126
# tooltip: str
126127

128+
127129
indicators = map(lambda ind: NLPIndicators(*ind), INDICATOR_W_IDS)
128130
INDICATOR_JSONS = [asdict(ind) for ind in indicators]

0 commit comments

Comments
 (0)