simpeg · kkappler · Sep 8, 2022 · Sep 2, 2022 · Sep 3, 2022
diff --git a/aurora/pipelines/process_mth5.py b/aurora/pipelines/process_mth5.py
@@ -458,24 +458,17 @@ def process_mth5(
     else:
         local_station_id = processing_config.stations.local.id
         station_metadata = tfk_dataset.get_station_metadata(local_station_id)
-
-        # https://github.com/kujaku11/mt_metadata/issues/90 (Do we need if/else here?)
-        #
-        # Also, assuming mth5 file versions are either 0.1.0 or 0.2.0, and not yet
-        # looking at mixed versions -- although that could happen.  That is something
-        # to check earlier, like when we populate data dataset_df
-        if len(mth5_objs) == 1:
-            key = list(mth5_objs.keys())[0]
-            if mth5_objs[key].file_version == "0.1.0":
-                survey_dict = mth5_objs[key].survey_group.metadata.to_dict()
-            elif mth5_objs[key].file_version == "0.2.0":
-                survey_dict = mth5_objs[key].surveys_group.metadata.to_dict()
-        else:
-            print("WARN: Need test for multiple mth5 objs for non-tf_collection output")
-            print("WARN: Also need to add handling of 0.1.0 vs 0.2.0 mth5 file_version")
-            key = list(mth5_objs.keys())[0]
-            survey_dict = mth5_objs[key].survey_group.metadata.to_dict()
-            # raise Exception
+        local_mth5_obj = mth5_objs[local_station_id]
+
+        if local_mth5_obj.file_version == "0.1.0":
+            survey_dict = local_mth5_obj.survey_group.metadata.to_dict()
+        elif local_mth5_obj.file_version == "0.2.0":
+            # this could be a method of tf_kernel.get_survey_dict()
+            survey_id = dataset_df[
+                dataset_df["station_id"] == local_station_id
+            ].survey.unique()[0]
+            survey_obj = local_mth5_obj.get_survey(survey_id)
+            survey_dict = survey_obj.metadata.to_dict()
 
         tf_cls = export_tf(
             tf_collection,

diff --git a/aurora/transfer_function/kernel_dataset.py b/aurora/transfer_function/kernel_dataset.py
@@ -118,6 +118,21 @@ def clone_dataframe(self):
         return copy.deepcopy(self.df)
 
     def from_run_summary(self, run_summary, local_station_id, remote_station_id=None):
+        """
+
+        Parameters
+        ----------
+        run_summary: aurora.pipelines.run_summary.RunSummary
+            Summary of available data for processing from one or more stations
+        local_station_id: string
+            Label of the station for which an estimate will be computed
+        remote_station_id: string
+            Label of the remote reference station
+
+        Returns
+        -------
+
+        """
         self.local_station_id = local_station_id
         self.remote_station_id = remote_station_id
 
@@ -133,6 +148,9 @@ def from_run_summary(self, run_summary, local_station_id, remote_station_id=None
             df.remote = cond
 
         self.df = df
+        if remote_station_id:
+            self.restrict_run_intervals_to_simultaneous()
+        self._add_duration_column()
 
     @property
     def mini_summary(self):
@@ -142,19 +160,29 @@ def mini_summary(self):
     def print_mini_summary(self):
         print(self.mini_summary)
 
-    @property
-    def add_duration(self):
+    def _add_duration_column(self):
         """ """
         timedeltas = self.df.end - self.df.start
         durations = [x.total_seconds() for x in timedeltas]
         self.df["duration"] = durations
         return
 
     def drop_runs_shorter_than(self, duration, units="s"):
+        """
+        This needs to have duration refreshed before hand
+        Parameters
+        ----------
+        duration
+        units
+
+        Returns
+        -------
+
+        """
         if units != "s":
             raise NotImplementedError
         if "duration" not in self.df.columns:
-            self.add_duration
+            self._add_duration_column()
         drop_cond = self.df.duration < duration
         self.df.drop(self.df[drop_cond].index, inplace=True)
         self.df.reset_index(drop=True, inplace=True)
@@ -201,27 +229,26 @@ def restrict_run_intervals_to_simultaneous(self):
                 if intervals_overlap(
                     local_row.start, local_row.end, remote_row.start, remote_row.end
                 ):
-                    print(f"OVERLAP {i_local}, {i_remote}")
+                    # print(f"OVERLAP {i_local}, {i_remote}")
                     olap_start, olap_end = overlap(
                         local_row.start, local_row.end, remote_row.start, remote_row.end
                     )
-                    print(
-                        f"{olap_start} -- {olap_end}\n "
-                        f"{(olap_end-olap_start).seconds}s\n\n"
-                    )
+                    # print(
+                    #     f"{olap_start} -- {olap_end}\n "
+                    #     f"{(olap_end-olap_start).seconds}s\n\n"
+                    # )
 
                     local_sub_run = local_row.copy(deep=True)
-                    # local_sub_run.drop("index", inplace=True)
                     remote_sub_run = remote_row.copy(deep=True)
-                    # remote_sub_run.drop("index", inplace=True)
                     local_sub_run.start = olap_start
                     local_sub_run.end = olap_end
                     remote_sub_run.start = olap_start
                     remote_sub_run.end = olap_end
                     output_sub_runs.append(local_sub_run)
                     output_sub_runs.append(remote_sub_run)
                 else:
-                    print(f"NOVERLAP {i_local}, {i_remote}")
+                    pass
+                    # print(f"NOVERLAP {i_local}, {i_remote}")
         df = pd.DataFrame(output_sub_runs)
         df = df.reset_index(drop=True)
         self.df = df