Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 11 additions & 18 deletions aurora/pipelines/process_mth5.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,24 +458,17 @@ def process_mth5(
else:
local_station_id = processing_config.stations.local.id
station_metadata = tfk_dataset.get_station_metadata(local_station_id)

# https://github.com/kujaku11/mt_metadata/issues/90 (Do we need if/else here?)
#
# Also, assuming mth5 file versions are either 0.1.0 or 0.2.0, and not yet
# looking at mixed versions -- although that could happen. That is something
# to check earlier, like when we populate data dataset_df
if len(mth5_objs) == 1:
key = list(mth5_objs.keys())[0]
if mth5_objs[key].file_version == "0.1.0":
survey_dict = mth5_objs[key].survey_group.metadata.to_dict()
elif mth5_objs[key].file_version == "0.2.0":
survey_dict = mth5_objs[key].surveys_group.metadata.to_dict()
else:
print("WARN: Need test for multiple mth5 objs for non-tf_collection output")
print("WARN: Also need to add handling of 0.1.0 vs 0.2.0 mth5 file_version")
key = list(mth5_objs.keys())[0]
survey_dict = mth5_objs[key].survey_group.metadata.to_dict()
# raise Exception
local_mth5_obj = mth5_objs[local_station_id]

if local_mth5_obj.file_version == "0.1.0":
survey_dict = local_mth5_obj.survey_group.metadata.to_dict()
elif local_mth5_obj.file_version == "0.2.0":
# this could be a method of tf_kernel.get_survey_dict()
survey_id = dataset_df[
dataset_df["station_id"] == local_station_id
].survey.unique()[0]
survey_obj = local_mth5_obj.get_survey(survey_id)
survey_dict = survey_obj.metadata.to_dict()

tf_cls = export_tf(
tf_collection,
Expand Down
49 changes: 38 additions & 11 deletions aurora/transfer_function/kernel_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,21 @@ def clone_dataframe(self):
return copy.deepcopy(self.df)

def from_run_summary(self, run_summary, local_station_id, remote_station_id=None):
"""

Parameters
----------
run_summary: aurora.pipelines.run_summary.RunSummary
Summary of available data for processing from one or more stations
local_station_id: string
Label of the station for which an estimate will be computed
remote_station_id: string
Label of the remote reference station

Returns
-------

"""
self.local_station_id = local_station_id
self.remote_station_id = remote_station_id

Expand All @@ -133,6 +148,9 @@ def from_run_summary(self, run_summary, local_station_id, remote_station_id=None
df.remote = cond

self.df = df
if remote_station_id:
self.restrict_run_intervals_to_simultaneous()
self._add_duration_column()

@property
def mini_summary(self):
Expand All @@ -142,19 +160,29 @@ def mini_summary(self):
def print_mini_summary(self):
print(self.mini_summary)

@property
def add_duration(self):
def _add_duration_column(self):
""" """
timedeltas = self.df.end - self.df.start
durations = [x.total_seconds() for x in timedeltas]
self.df["duration"] = durations
return

def drop_runs_shorter_than(self, duration, units="s"):
"""
This needs to have duration refreshed before hand
Parameters
----------
duration
units

Returns
-------

"""
if units != "s":
raise NotImplementedError
if "duration" not in self.df.columns:
self.add_duration
self._add_duration_column()
drop_cond = self.df.duration < duration
self.df.drop(self.df[drop_cond].index, inplace=True)
self.df.reset_index(drop=True, inplace=True)
Expand Down Expand Up @@ -201,27 +229,26 @@ def restrict_run_intervals_to_simultaneous(self):
if intervals_overlap(
local_row.start, local_row.end, remote_row.start, remote_row.end
):
print(f"OVERLAP {i_local}, {i_remote}")
# print(f"OVERLAP {i_local}, {i_remote}")
olap_start, olap_end = overlap(
local_row.start, local_row.end, remote_row.start, remote_row.end
)
print(
f"{olap_start} -- {olap_end}\n "
f"{(olap_end-olap_start).seconds}s\n\n"
)
# print(
# f"{olap_start} -- {olap_end}\n "
# f"{(olap_end-olap_start).seconds}s\n\n"
# )

local_sub_run = local_row.copy(deep=True)
# local_sub_run.drop("index", inplace=True)
remote_sub_run = remote_row.copy(deep=True)
# remote_sub_run.drop("index", inplace=True)
local_sub_run.start = olap_start
local_sub_run.end = olap_end
remote_sub_run.start = olap_start
remote_sub_run.end = olap_end
output_sub_runs.append(local_sub_run)
output_sub_runs.append(remote_sub_run)
else:
print(f"NOVERLAP {i_local}, {i_remote}")
pass
# print(f"NOVERLAP {i_local}, {i_remote}")
df = pd.DataFrame(output_sub_runs)
df = df.reset_index(drop=True)
self.df = df
Expand Down