Skip to content

Commit 9f96e25

Browse files
committed
Putting in place a fix for the document alignment problem that we had. We had broken the connection between doc and student with the reorder and were pulling in errors from inactive students. This presents a fix for both things.
1 parent 2818dae commit 9f96e25

File tree

1 file changed

+60
-9
lines changed

1 file changed

+60
-9
lines changed

modules/writing_observer/writing_observer/aggregator.py

Lines changed: 60 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import time
2+
import traceback
23
import learning_observer.util
34

45
def excerpt_active_text(
@@ -125,7 +126,16 @@ def aggregate_course_summary_stats(student_data):
125126
async def get_latest_student_documents(student_data):
126127
'''
127128
This will retrieve the latest student documents from the database. It breaks
128-
abstractions.
129+
abstractions.
130+
131+
It also involves some excess loops that are annoying but briefly we need to
132+
determine which students actually *have* last writing data. Then we need to
133+
go through and build keys for that data. Then we fetch the data itself.
134+
Later on in this file we need to marry the information again. This builds
135+
up a series of lists which are successively merged into a single dict with
136+
the resulting data.
137+
138+
Some of what is copied along is clearly duplicative and probably unneeded.
129139
'''
130140
import learning_observer.kvs
131141

@@ -134,6 +144,11 @@ async def get_latest_student_documents(student_data):
134144

135145
kvs = learning_observer.kvs.KVS()
136146

147+
# Compile a list of the active students defined as those
148+
# for which we have some "last_document"
149+
active_students = [s for s in student_data if 'writing_observer.writing_analysis.last_document' in s]
150+
151+
# Now collect documents for all of the active students.
137152
document_keys = ([
138153
learning_observer.stream_analytics.helpers.make_key(
139154
writing_observer.writing_analysis.reconstruct,
@@ -142,13 +157,29 @@ async def get_latest_student_documents(student_data):
142157
EventField('doc_id'): s['writing_observer.writing_analysis.last_document']['document_id']
143158
},
144159
KeyStateType.INTERNAL
145-
) for s in student_data])
146-
147-
writing_data = await kvs.multiget(keys=document_keys)
160+
) for s in active_students]) # in student_data if 'writing_observer.writing_analysis.last_document' in s])
161+
162+
kvs_data = await kvs.multiget(keys=document_keys)
148163

164+
149165
# Return blank entries if no data, rather than None. This makes it possible
150-
# to use item.get with defaults sanely.
151-
writing_data = [{} if item is None else item for item in writing_data]
166+
# to use item.get with defaults sanely. For the sake of later alignment
167+
# we also zip up the items with the keys and users that they come from
168+
# this hack allows us to align them after cleaning occurrs later.
169+
# writing_data = [{} if item is None else item for item in writing_data]
170+
writing_data = []
171+
for idx in range(len(document_keys)):
172+
student = active_students[idx]
173+
doc = kvs_data[idx]
174+
175+
# If we have an empty item we simply return an empty dict with the
176+
# student but an empty doc value.
177+
if (doc is None): doc = {}
178+
179+
# Now insert the student data and pass it along.
180+
doc['student'] = student
181+
writing_data.append(doc)
182+
152183
return writing_data
153184

154185

@@ -167,6 +198,7 @@ async def merge_with_student_data(writing_data, student_data):
167198
'''
168199
Add the student metadata to each text
169200
'''
201+
170202
for item, student in zip(writing_data, student_data):
171203
if 'edit_metadata' in item:
172204
del item['edit_metadata']
@@ -179,16 +211,35 @@ async def merge_with_student_data(writing_data, student_data):
179211
async def latest_data(student_data):
180212
'''
181213
HACK HACK HACK
182-
183-
I just hardcoded this, breaking abstractions, repeating code, etc.
214+
215+
This code needs to take the student data as a dict and then
216+
collect the latest writing data for each student (assuming
217+
they have it). The code then passes that writing data on
218+
to Paul's code for processing. For the time being this
219+
works by essentially building up some large dicts that
220+
contain the text and student data together.
221+
222+
In the long run this should *all* be replaced by a cleaner
223+
object interface that hides some of this from the user
224+
but for the now we'll roll with this.
184225
'''
226+
# Get the latest documents with the students appended.
185227
writing_data = await get_latest_student_documents(student_data)
228+
229+
# Strip out the unnecessary extra data.
186230
writing_data = await remove_extra_data(writing_data)
187-
writing_data = await merge_with_student_data(writing_data, student_data)
231+
232+
# The merge process is not needed at the moment because we are making
233+
# a simple alignment.
234+
#writing_data = await merge_with_student_data(writing_data, student_data)
235+
188236
just_the_text = [w.get("text", "") for w in writing_data]
237+
189238
annotated_texts = await writing_observer.awe_nlp.process_texts_parallel(just_the_text)
239+
190240
for annotated_text, single_doc in zip(annotated_texts, writing_data):
191241
if annotated_text != "Error":
192242
single_doc.update(annotated_text)
193243
# Call Paul's code to add stuff to it
244+
194245
return {'latest_writing_data': writing_data}

0 commit comments

Comments
 (0)