edanalytics · johncmerfeld · Jan 13, 2026 · Jan 5, 2026 · Jan 7, 2026 · Jan 7, 2026
diff --git a/executor/executor/executor.py b/executor/executor/executor.py
@@ -340,6 +340,8 @@ def earthmover_run(self):
         self.logger.info(f"Student ID types in Ed-Fi roster: {os.environ['EDFI_STUDENT_ID_TYPES']}")
 
         self.check_input_encoding()
+        fatal = False
+
         try:
             encoding_mod = []
             if self.input_sources["INPUT_FILE"]["is_plausible_non_utf8"]:
@@ -385,27 +387,23 @@ def earthmover_run(self):
                 except subprocess.CalledProcessError:
                     # failed again, move on to shutdown procedure
                     pass
+        finally:
+            #    the app relies on the presence of the unmatched students file but does not check
+            # whether it is populated. It is possible that Earthmover successfully matches students
+            # (so the file is empty) but then fails during data transformation. We need to check the
+            # match rates whether or not Earthmover succeeds, so that we don't accidentally tell the
+            # user there are unmatched students when there are none
+            self.record_highest_match_rate()
             if fatal:
                 # shut it down
-                if count_unmatched_students() == 0:
-                    #    the app relies on the presence of the unmatched students file but does not check
-                    # whether it is populated. It is possible that Earthmover successfully matches students
-                    # (so the file is empty) but then fails during data transformation. By skipping the
-                    # upload, we avoid the possibility of telling the user there were unmatched students when
-                    # in fact there were none
-                    self.logger.debug("no unmatched students. Skipping upload")
-                    artifact.UNMATCHED_STUDENTS.needs_upload = False
                 self.error = error.EarthmoverRunError()
                 raise
 
         self.upload_artifact(artifact.EM_RESULTS)
         self.upload_artifact(artifact.MATCH_RATES)
 
-        #    If we reach this point, it's pretty likely that the input file was basically compatible with 
-        # the bundle. However, we still need to check whether student ID matching was at least partially 
-        # successful. It used to be that the student_ids bundle would halt execution when there weren't
-        # enough matches, but those days are over; we handle it ourselves.
-        self.handle_unmatched_students()
+        # If we reach this point, it's likely that the input file was basically compatible with the bundle
+        self.report_unmatched_students()
 
     def check_input_encoding(self):
         """Determine whether assessment file should be loaded with a non-UTF-8 encoding"""
@@ -515,65 +513,66 @@ def unpack_id_types(self):
         else:
             self.stu_unique_id_in_roster = None
 
-    def handle_unmatched_students(self):
-        """Alert the app to the existence of unmatched students (if any) and the best candidate for ID matching"""
+    def record_highest_match_rate(self):
+        """Read the match rates file to determine which ID was the best match and how many student IDs failed to match it"""
         #    Let's make one thing very clear: There is an unmatched students file and a match rates file.
         # The unmatched students file is like the input file, filtered for any records that do not match the
         # **primary** ID used. e.g. If 6/10 of the input records match a state ID and 2/10 others match a
         # local ID, then the 4/10 without matching state IDs go in the unmatched students file, but both types
         # of match are recorded in the match rates file. Both are used for different purposes by the app and
         # within the exeuctor
-
-        # if there are no unmatched students, there is no unmatched students file
-        num_unmatched = count_unmatched_students()
-        if num_unmatched == 0:
-            self.logger.debug("no unmatched students. Skipping upload")
-            # mark so that we don't try to upload it at the end
-            artifact.UNMATCHED_STUDENTS.needs_upload = False
-            return
-
-        self.logger.warning('earthmover run produced unmatched inputs')
-
         with open(artifact.MATCH_RATES.path) as f:
             match_rates = [
                 {k: v for k, v in row.items()}
                 for row in csv.DictReader(f, skipinitialspace=True)
             ]
 
-        sufficient_matches = False
-        highest_match_rate = 0.0
-        highest_match_id_name = "N/A"
-        highest_match_id_type = "N/A"
+        # in the case of literally zero matches, the file is empty and these defaults remain
+        self.highest_match_rate = 0.0
+        self.highest_match_id_name = "N/A"
+        self.highest_match_id_type = "N/A"
+        self.num_unmatched_students = 0
 
-        # (a) if there are truly no matches, the match rates file has a header and no entries
         if len(match_rates) > 0:
             self.logger.info(f"some records matched - match rates by ID: {match_rates}")
+
             highest_match = sorted(match_rates, reverse=True, key=lambda mr: float(mr['match_rate']))[0]
-            highest_match_rate = float(highest_match["match_rate"])
-            highest_match_id_name = highest_match["source_column_name"]
-            highest_match_id_type = highest_match["edfi_column_name"]
-
-            # (b)
-            if highest_match_rate >= config.REQUIRED_ID_MATCH_RATE:
-                #    in this case, there are unmatched students but not so many that we doubt the
-                # integrity of the file. Send the ones that match and give the rest back to the user
-                sufficient_matches = True
-                # if an "actual" ID is replicated as studentUniqueId, we should send the actual ID to the user
-                if highest_match_id_type == "studentUniqueId" and self.stu_unique_id_in_roster:
-                    highest_match_id_type = self.stu_unique_id_in_roster
-
-                # additional context so the app can help the user fix their file
-                self.send_id_matches(highest_match_id_name, highest_match_id_type, num_unmatched)
-                self.upload_artifact(artifact.UNMATCHED_STUDENTS)
-        if not sufficient_matches:
-            #    If we made it here, then either (a) or (b) is FALSE and thus the input file is no good.
-            # Don't bother uploading anything and instead show the user a specific error
-            self.error = error.InsufficientMatchesError(highest_match_rate, config.REQUIRED_ID_MATCH_RATE, highest_match_id_name, highest_match_id_type)
+            self.highest_match_rate = float(highest_match["match_rate"])
+            self.highest_match_id_name = highest_match["source_column_name"]
+            self.highest_match_id_type = highest_match["edfi_column_name"]
+            self.num_unmatched_students = int(highest_match["num_rows"]) - int(highest_match["num_matches"])
+
+        if self.num_unmatched_students == 0:
+            self.logger.debug("no unmatched students. Skipping upload of unmatched students file")
+            artifact.UNMATCHED_STUDENTS.needs_upload = False
+
+    def report_unmatched_students(self):
+        """Alert the app to the existence of unmatched students (if any) and the best candidate for ID matching"""
+        if self.num_unmatched_students == 0:
+            return
+
+        self.logger.warning('earthmover run failed to match some student IDs')
+
+        if self.highest_match_rate >= config.REQUIRED_ID_MATCH_RATE:
+            #    in this case, there are unmatched students but not so many that we doubt the
+            # integrity of the file. Send the ones that match and give the rest back to the user
+            # if an "actual" ID is replicated as studentUniqueId, we should send the actual ID to the user
+            id_type_to_report = self.highest_match_id_type
+            if self.highest_match_id_type == "studentUniqueId" and self.stu_unique_id_in_roster:
+                id_type_to_report = self.stu_unique_id_in_roster
+
+            # additional context so the app can help the user fix their file
+            self.send_id_matches(self.highest_match_id_name, id_type_to_report, self.num_unmatched_students)
+            self.upload_artifact(artifact.UNMATCHED_STUDENTS)
+        else:
+            #    Insufficient matches. Assume the input file is no good Don't bother uploading anything.
+            # Instead, alert the user with an error
+            self.error = error.InsufficientMatchesError(self.highest_match_rate, config.REQUIRED_ID_MATCH_RATE, self.highest_match_id_name, self.highest_match_id_type)
             #    For now, since we're asking the user to revisit their entire file, it's simpler if we don't
             # return the unmatched students file at all
             self.logger.debug("too many unmatched students. Skipping upload")
             artifact.UNMATCHED_STUDENTS.needs_upload = False
-            raise ValueError(f"insufficient ID matches to continue (highest rate {highest_match_rate} < required {config.REQUIRED_ID_MATCH_RATE}; ID column name: {highest_match_id_name}; Ed-Fi ID type: {highest_match_id_type})")
+            raise ValueError(f"insufficient ID matches to continue (highest rate {self.highest_match_rate} < required {config.REQUIRED_ID_MATCH_RATE}; ID column name: {self.highest_match_id_name}; Ed-Fi ID type: {self.highest_match_id_type})")
 
     def lightbeam_send(self):
         """Upload Earthmover's outputs to the ODS"""
@@ -703,17 +702,6 @@ def send_summary(self):
         self.conn.post(self.summary_url, json=self.summary)
 
 
-def count_unmatched_students():
-    """Returns the number of data rows in Earthmover's unmatched students file"""
-    try:
-        with open(artifact.UNMATCHED_STUDENTS.path) as f:
-                #   header doesn't count - also, when you open this file in an IDE, it may appear to have an extra blank line,
-                # which would lead to false positives when there are no unmatched students. Python and programs like `wc -l`
-                # nonetheless appear to read it in as a single-line file, so this is correct
-            return max(sum(1 for _ in f) - 1, 0)
-    except FileNotFoundError:
-        return 0
-
 def localize_s3_path(path):
     """Convert an S3 'path' to a single filename"""
     return path.replace("/", "__")
diff --git a/executor/requirements.txt b/executor/requirements.txt
@@ -1,6 +1,6 @@
 awscli
 boto3
 chardet
-earthmover==0.4.7
+earthmover==0.4.8
 lightbeam==0.1.10
 requests