From 14adee737550a30bf8f517d0290cc351dba3d970 Mon Sep 17 00:00:00 2001 From: nikita Date: Wed, 18 Sep 2019 15:27:14 +0300 Subject: [PATCH 01/45] modify dump --- docker_containers/picking_docker/picking_inference.py | 3 +-- seismicpro/src/seismic_batch.py | 10 ++++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/docker_containers/picking_docker/picking_inference.py b/docker_containers/picking_docker/picking_inference.py index 4a2f1a912..df3ce4249 100644 --- a/docker_containers/picking_docker/picking_inference.py +++ b/docker_containers/picking_docker/picking_inference.py @@ -5,7 +5,6 @@ import sys import argparse -import torch import numpy as np sys.path.append('../..') @@ -31,7 +30,7 @@ def make_prediction(): parser.add_argument('-ts', '--trace_len', type=int, help="The number of first samples \ of the trace to load.", default=751) parser.add_argument('-dvc', '--device', type=str or torch.device, help="The device for \ - inference. Can be 'cpu' or 'gpu'.", default=torch.device('cpu')) + inference. Can be 'cpu' or 'gpu'.", default='cpu') args = parser.parse_args() path_raw = args.path_raw model = args.path_model diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index a620d1eb2..4fa1ed313 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -448,7 +448,7 @@ def _dump_single_segy(self, src, path): return self @action - def _dump_picking(self, src, path, traces, to_samples, columns=None): + def _dump_picking(self, src, path, traces, to_samples, columns=None, max_len=[6, 4]): """Dump picking to file. Parameters @@ -486,9 +486,11 @@ def _dump_picking(self, src, path, traces, to_samples, columns=None): df = df.reset_index(drop=self.index.name is None)[columns] df.columns = df.columns.droplevel(1) - for i in [0, 2, 4]: - df.insert(i, str(i), "") - df.to_csv(path, index=False, sep='\t', header=False, encoding='ascii', mode='a') + with open(path, 'a') as f: + for row in df.iterrows(): + for i, item in enumerate(row[1][:-1]): + f.write(str(item).ljust(max_len[i] + 8)) + f.write(str(row[1][i+1]) + '\n') return self @action From 680985074553b8fa5d18b20774581e291a69ac07 Mon Sep 17 00:00:00 2001 From: nikita Date: Wed, 18 Sep 2019 21:11:14 +0300 Subject: [PATCH 02/45] added picking shift phase action --- .../picking_docker/picking_inference.py | 43 +++++++++++-------- seismicpro/src/seismic_batch.py | 27 ++++++++++-- 2 files changed, 49 insertions(+), 21 deletions(-) diff --git a/docker_containers/picking_docker/picking_inference.py b/docker_containers/picking_docker/picking_inference.py index df3ce4249..50bcd6a39 100644 --- a/docker_containers/picking_docker/picking_inference.py +++ b/docker_containers/picking_docker/picking_inference.py @@ -9,9 +9,9 @@ sys.path.append('../..') -from seismicpro.batchflow import Dataset, B -from seismicpro.batchflow.models.torch import UNet -from seismicpro.src import FieldIndex, TraceIndex, SeismicDataset +from seismicpro.batchflow import B, Pipeline +from seismicpro.batchflow.models.torch import UNet # pylint: disable=import-error +from seismicpro.src import TraceIndex, SeismicDataset def make_prediction(): """ Read the model and data paths and run inference pipeline. @@ -29,8 +29,9 @@ def make_prediction(): the batch for inference stage.", default=1000) parser.add_argument('-ts', '--trace_len', type=int, help="The number of first samples \ of the trace to load.", default=751) - parser.add_argument('-dvc', '--device', type=str or torch.device, help="The device for \ + parser.add_argument('-dvc', '--device', type=str, help="The device for \ inference. Can be 'cpu' or 'gpu'.", default='cpu') + parser.add_argument('-s', '--shift', type=float, help="Picking time phase shift", default=0) args = parser.parse_args() path_raw = args.path_raw model = args.path_model @@ -39,9 +40,10 @@ def make_prediction(): batch_size = args.batch_size trace_len = args.trace_len device = args.device - predict(path_raw, model, num_zero, save_to, batch_size, trace_len, device) + shift = args.shift + predict(path_raw, model, num_zero, save_to, batch_size, trace_len, device, shift) -def predict(path_raw, path_model, num_zero, save_to, batch_size, trace_len, device): +def predict(path_raw, path_model, num_zero, save_to, batch_size, trace_len, device, shift): """Make predictions and dump results using loaded model and path to data. Parameters @@ -60,6 +62,8 @@ def predict(path_raw, path_model, num_zero, save_to, batch_size, trace_len, devi The number of first samples in the trace to load to the pipeline. device: str or torch.device, default: 'cpu' The device used for inference. Can be 'gpu' in case of avaliavle GPU. + shift: int, default: 0 + Picking time correction for the given shift. """ data = SeismicDataset(TraceIndex(name='raw', path=path_raw)) @@ -75,19 +79,22 @@ def predict(path_raw, path_model, num_zero, save_to, batch_size, trace_len, devi except OSError: pass - test_pipeline = (data.p - .init_model('dynamic', UNet, 'my_model', config=config_predict) - .load(components='raw', fmt='segy', tslice=np.arange(trace_len)) - .drop_zero_traces(num_zero=num_zero, src='raw') - .standardize(src='raw', dst='raw') - .add_components(components='predictions') - .apply_transform_all(src='raw', dst='raw', func=lambda x: np.stack(x)) - .predict_model('my_model', B('raw'), fetches='predictions', - save_to=B('predictions', mode='a')) - .mask_to_pick(src='predictions', dst='predictions', labels=False) - .dump(src='predictions', fmt='picks', path=save_to, - traces='raw', to_samples=True)) + test_tmpl = (data.p + .init_model('dynamic', UNet, 'my_model', config=config_predict) + .load(components='raw', fmt='segy', tslice=np.arange(trace_len)) + .drop_zero_traces(num_zero=num_zero, src='raw') + .standardize(src='raw', dst='raw') + .add_components(components='predictions') + .apply_transform_all(src='raw', dst='raw', func=lambda x: np.stack(x)) + .predict_model('my_model', B('raw'), fetches='predictions', + save_to=B('predictions', mode='a')) + .mask_to_pick(src='predictions', dst='predictions', labels=False) + ) + if shift: + test_tmpl += Pipeline().shift_pick(src='predictions', dst='predictions', shift=np.pi*shift) + test_pipeline = test_tmpl + Pipeline().dump(src='predictions', fmt='picks', path=save_to, + traces='raw', to_samples=True) test_pipeline.run(batch_size, n_epochs=1, drop_last=False, shuffle=False, bar=True) if __name__ == "__main__": diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index 4fa1ed313..71a0ee5d9 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -4,6 +4,7 @@ import numpy as np import matplotlib.pyplot as plt from scipy import signal +from scipy.signal import hilbert import pywt import segyio @@ -448,7 +449,7 @@ def _dump_single_segy(self, src, path): return self @action - def _dump_picking(self, src, path, traces, to_samples, columns=None, max_len=[6, 4]): + def _dump_picking(self, src, path, traces, to_samples, columns=None, max_len=(6, 4)): """Dump picking to file. Parameters @@ -490,7 +491,7 @@ def _dump_picking(self, src, path, traces, to_samples, columns=None, max_len=[6, for row in df.iterrows(): for i, item in enumerate(row[1][:-1]): f.write(str(item).ljust(max_len[i] + 8)) - f.write(str(row[1][i+1]) + '\n') + f.write(str(row[1][-1]) + '\n') return self @action @@ -1122,7 +1123,7 @@ def mask_to_pick(self, src, dst, labels=True): if not labels: data = np.argmax(data, axis=1) - dst_data = massive_block(data) + dst_data = massive_block(np.stack(data)) setattr(self, dst, np.array([i for i in dst_data] + [None])[:-1]) return self @@ -1253,3 +1254,23 @@ def equalize(self, index, src, dst, params, survey_id_col=None): getattr(self, dst)[pos] = equalized_field return self + + @action + @inbatch_parallel(init='_init_component', target="threads") + def shift_pick(self, index, src, dst=None, src_raw='raw', shift=1.5*np.pi, thd=0.2): + """ Shifts picking time on given phase""" + pos = self.get_pos(None, src, index) + pick = getattr(self, src)[pos] + trace = getattr(self, src_raw)[pos] + + analytic = hilbert(trace) + phase = np.unwrap(np.angle(analytic)) + phase = np.squeeze(phase) + + shifted_phase = phase[pick] - shift + phase_mod = np.abs(phase - shifted_phase) + raw_zero = phase_mod.argmin() + zero = np.where((np.abs(phase_mod - phase_mod[raw_zero])) < thd)[0][-1] + + getattr(self, dst)[pos] = zero + return self From 6a18b483592f41197ee50a605202f734b6be146a Mon Sep 17 00:00:00 2001 From: nikita Date: Fri, 20 Sep 2019 15:49:01 +0300 Subject: [PATCH 03/45] modify shift action postprocessing --- seismicpro/src/seismic_batch.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index 71a0ee5d9..0344ce145 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -1257,7 +1257,7 @@ def equalize(self, index, src, dst, params, survey_id_col=None): @action @inbatch_parallel(init='_init_component', target="threads") - def shift_pick(self, index, src, dst=None, src_raw='raw', shift=1.5*np.pi, thd=0.2): + def shift_pick(self, index, src, dst=None, src_raw='raw', shift=1.5*np.pi, thd=0.05): """ Shifts picking time on given phase""" pos = self.get_pos(None, src, index) pick = getattr(self, src)[pos] @@ -1269,8 +1269,10 @@ def shift_pick(self, index, src, dst=None, src_raw='raw', shift=1.5*np.pi, thd=0 shifted_phase = phase[pick] - shift phase_mod = np.abs(phase - shifted_phase) - raw_zero = phase_mod.argmin() - zero = np.where((np.abs(phase_mod - phase_mod[raw_zero])) < thd)[0][-1] + zero = phase_mod.argmin() + + n_skip = (np.abs(trace[zero:]) > thd).argmax() - 1 + zero += n_skip getattr(self, dst)[pos] = zero return self From 1ad146d94ad8241455571c6b4e56c4f0f39e960f Mon Sep 17 00:00:00 2001 From: nikita Date: Fri, 20 Sep 2019 16:49:31 +0300 Subject: [PATCH 04/45] squuzing trace --- seismicpro/src/seismic_batch.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index 0344ce145..9ef5df90f 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -1262,13 +1262,13 @@ def shift_pick(self, index, src, dst=None, src_raw='raw', shift=1.5*np.pi, thd=0 pos = self.get_pos(None, src, index) pick = getattr(self, src)[pos] trace = getattr(self, src_raw)[pos] + trace = np.squeeze(trace) analytic = hilbert(trace) phase = np.unwrap(np.angle(analytic)) - phase = np.squeeze(phase) - shifted_phase = phase[pick] - shift - phase_mod = np.abs(phase - shifted_phase) + phase_diff = phase[pick] - shift + phase_mod = np.abs(phase - phase_diff) zero = phase_mod.argmin() n_skip = (np.abs(trace[zero:]) > thd).argmax() - 1 From 84a2462b6b7676c377d99b90365d1f1c9f88ceb3 Mon Sep 17 00:00:00 2001 From: nikita Date: Wed, 25 Sep 2019 12:44:39 +0300 Subject: [PATCH 05/45] dump is compatible with knn index --- seismicpro/src/seismic_batch.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index 9ef5df90f..35b020a1d 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -10,7 +10,7 @@ from ..batchflow import action, inbatch_parallel, Batch, any_action_failed -from .seismic_index import SegyFilesIndex, FieldIndex +from .seismic_index import SegyFilesIndex, FieldIndex, KNNIndex from .utils import (FILE_DEPENDEND_COLUMNS, partialmethod, calculate_sdc_for_field, massive_block, check_unique_fieldrecord_across_surveys) @@ -483,6 +483,10 @@ def _dump_picking(self, src, path, traces, to_samples, columns=None, max_len=(6, df = df.sort_values(by=sort_by) df = df.loc[self.indices] + + if isinstance(self.index, KNNIndex): + df = df.iloc[::5, :] + df['timeOffset'] = data.astype(int) df = df.reset_index(drop=self.index.name is None)[columns] df.columns = df.columns.droplevel(1) From e866c2534bcdb2542b274e07cdf8f363fce08fb3 Mon Sep 17 00:00:00 2001 From: nikita Date: Wed, 25 Sep 2019 16:40:31 +0300 Subject: [PATCH 06/45] shift now compatible with knn index --- seismicpro/src/seismic_batch.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index 35b020a1d..56c1ca623 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -1266,6 +1266,10 @@ def shift_pick(self, index, src, dst=None, src_raw='raw', shift=1.5*np.pi, thd=0 pos = self.get_pos(None, src, index) pick = getattr(self, src)[pos] trace = getattr(self, src_raw)[pos] + + if isinstance(self.index, KNNIndex): + trace = trace[0] + trace = np.squeeze(trace) analytic = hilbert(trace) From 82e70860f9cdcb2885822320a96e965cc8736b20 Mon Sep 17 00:00:00 2001 From: nikita Date: Sun, 6 Oct 2019 20:26:08 +0300 Subject: [PATCH 07/45] manage raising knn index error --- seismicpro/src/seismic_index.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/seismicpro/src/seismic_index.py b/seismicpro/src/seismic_index.py index 56010ae08..cd14759ec 100644 --- a/seismicpro/src/seismic_index.py +++ b/seismicpro/src/seismic_index.py @@ -299,7 +299,8 @@ class KNNIndex(TraceIndex): traces. Columns include FieldRecord, TraceNumber, TRACE_SEQUENCE_FILE, file_id and a number of extra_headers if specified. """ - def __init__(self, *args, **kwargs): + def __init__(self, *args, raise_warning=False, **kwargs): + self.raise_warning = raise_warning kwargs['index_name'] = 'KNN' super().__init__(*args, **kwargs) @@ -318,7 +319,8 @@ def build_df(self, n_neighbors, **kwargs): nbrs = NearestNeighbors(n_neighbors=n_neighbors, algorithm='ball_tree') _, indices = nbrs.fit(data).kneighbors(data) if not np.all(indices[:, 0] == np.arange(len(data))): - raise ValueError("Faild to build KNNIndex. Duplicated CDP.") + if self.raise_warning: + raise ValueError("Faild to build KNNIndex. Duplicated CDP.") dfs.append(df.iloc[np.hstack(indices)]) df = pd.concat(dfs).reset_index(drop=True) From 3dc715dc405fd14704b40396cb18699569ad6abe Mon Sep 17 00:00:00 2001 From: Dmitry Podvyaznikov Date: Fri, 15 Nov 2019 14:36:28 +0300 Subject: [PATCH 08/45] Update dropping zero traces --- seismicpro/src/seismic_batch.py | 51 ++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index a620d1eb2..b0ad9368b 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -16,6 +16,7 @@ from .file_utils import write_segy_file from .plot_utils import IndexTracker, spectrum_plot, seismic_plot, statistics_plot, gain_plot +INDEX_UID = 'TRACE_SEQUENCE_FILE' PICKS_FILE_HEADERS = ['FieldRecord', 'TraceNumber', 'timeOffset'] @@ -175,30 +176,26 @@ def _post_filter_by_mask(self, mask, *args, **kwargs): Parameters ---------- mask : list - List of masks if ``src`` is ``str`` - or list of lists if ``src`` is list. + list of arrays bool arrays Returns ------- : SeismicBatch - New batch class of filtered components. + New batch with filtered components and new index. Note ---- All components will be changed with given mask and during the proccess, - new SeismicBatch instance will be created. + new SeismicBatch instance will be created with new index. """ + _ = args, kwargs if any_action_failed(mask): all_errors = [error for error in mask if isinstance(error, Exception)] print(all_errors) raise ValueError(all_errors) - _ = args - src = kwargs.get('src', None) - src = (src, ) if isinstance(src, str) else src - - mask = np.concatenate((np.array(mask))) - new_idf = self.index.get_df(index=np.hstack((mask)), reset=False) + mask = np.concatenate(mask) + new_idf = self.index.get_df(index=mask, reset=False) new_index = new_idf.index.unique() batch_index = type(self.index).from_index(index=new_index, idf=new_idf, @@ -211,11 +208,11 @@ def _post_filter_by_mask(self, mask, *args, **kwargs): for comp in batch.components: setattr(batch, comp, np.array([None] * len(batch.index))) - for i, index in enumerate(new_index): + for index in new_index: for isrc in batch.components: - pos = self.get_pos(None, isrc, index) - new_data = getattr(self, isrc)[pos][mask[pos]] - getattr(batch, isrc)[i] = new_data + pos_batch = batch.get_pos(None, isrc, index) + pos_self = self.get_pos(None, isrc, index) + getattr(batch, isrc)[pos_batch] = getattr(self, isrc)[pos_self] return batch def trace_headers(self, header, flatten=False): @@ -569,7 +566,7 @@ def _load_from_segy_file(self, index, *args, src, dst, tslice=None): _ = src, args pos = self.get_pos(None, "indices", index) path = index - trace_seq = self.index.get_df([index])[('TRACE_SEQUENCE_FILE', src)] + trace_seq = self.index.get_df([index])[(INDEX_UID, src)] if tslice is None: tslice = slice(None) @@ -678,7 +675,6 @@ def sort_traces(self, index, *args, src, sort_by, dst=None): @action @inbatch_parallel(init="indices", post='_post_filter_by_mask', target="threads") - @apply_to_each_component def drop_zero_traces(self, index, src, num_zero, **kwargs): """Drop traces with sequence of zeros longer than ```num_zero```. @@ -695,13 +691,28 @@ def drop_zero_traces(self, index, src, num_zero, **kwargs): Batch without dropped traces. """ _ = kwargs + sorting = self.meta[src]['sorting'] + if sorting is None: + raise ValueError('traces in `{}` component should be sorted ' + 'before dropping zero traces'.format(src)) + pos = self.get_pos(None, src, index) traces = getattr(self, src)[pos] mask = list() - for _, trace in enumerate(traces != 0): - diff_zeros = np.diff(np.append(np.where(trace)[0], len(trace))) - mask.append(False if len(diff_zeros) == 0 else np.max(diff_zeros) < num_zero) - return mask + for trace in traces: + nonzero_indices = np.nonzero(trace)[0] + # add -1 and len(trace) indices to count leading and trailing zero sequences + nonzero_indices = np.concatenate(([-1], nonzero_indices, [len(trace)])) + zero_seqs = np.diff(nonzero_indices) - 1 + mask.append(np.max(zero_seqs) < num_zero) + mask = np.array(mask) + + for isrc in self.components: + getattr(self, isrc)[pos] = getattr(self, isrc)[pos][mask] + + sorted_index_df = self.index.get_df(index)[(INDEX_UID, sorting)].sort_values(sorting) + order = np.argsort(sorted_index_df[INDEX_UID].values) + return mask[order] @action @inbatch_parallel(init='_init_component') From 679196d3807111c9f64ebf03f3aa498791c4ea65 Mon Sep 17 00:00:00 2001 From: Dmitry Podvyaznikov Date: Fri, 15 Nov 2019 16:10:23 +0300 Subject: [PATCH 09/45] Fix sorting; update zero trace drop Now sort action takes current sorting into account. drop_zero_traces now raises errors if src has no sorting or if other components in batch are sorted differently. --- seismicpro/src/seismic_batch.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index b0ad9368b..686ef1a6d 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -664,10 +664,23 @@ def sort_traces(self, index, *args, src, sort_by, dst=None): Batch with new trace sorting. """ _ = args + sorting = self.meta[dst]['sorting'] + pos = self.get_pos(None, src, index) df = self.index.get_df([index]) - order = np.argsort(df[sort_by].tolist()) + + if sorting: + if sorting == sort_by: + return self + + cols = [sorting, sort_by] + sorted_index_df = df[cols].sort_values(sorting) + order = np.argsort(sorted_index_df[sort_by].values) + else: + order = np.argsort(df[sort_by].tolist()) + getattr(self, dst)[pos] = getattr(self, src)[pos][order] + if pos == 0: self.meta[dst]['sorting'] = sort_by @@ -696,6 +709,10 @@ def drop_zero_traces(self, index, src, num_zero, **kwargs): raise ValueError('traces in `{}` component should be sorted ' 'before dropping zero traces'.format(src)) + has_same_sorting = all([self.meta[comp]['sorting'] == sorting for comp in self.components]) + if not has_same_sorting: + raise ValueError('all components in batch should have same sorting') + pos = self.get_pos(None, src, index) traces = getattr(self, src)[pos] mask = list() @@ -707,11 +724,12 @@ def drop_zero_traces(self, index, src, num_zero, **kwargs): mask.append(np.max(zero_seqs) < num_zero) mask = np.array(mask) - for isrc in self.components: - getattr(self, isrc)[pos] = getattr(self, isrc)[pos][mask] + for comp in self.components: + getattr(self, comp)[pos] = getattr(self, comp)[pos][mask] - sorted_index_df = self.index.get_df(index)[(INDEX_UID, sorting)].sort_values(sorting) - order = np.argsort(sorted_index_df[INDEX_UID].values) + cols = [(INDEX_UID, src), (sorting, '')] + sorted_index_df = self.index.get_df(index)[cols].sort_values(sorting) + order = np.argsort(sorted_index_df[cols[0]].values) return mask[order] @action From 5af360eb344585534d89ba112e9073c21565bbab Mon Sep 17 00:00:00 2001 From: Dmitry Podvyaznikov Date: Fri, 15 Nov 2019 16:53:41 +0300 Subject: [PATCH 10/45] Add docstrings --- seismicpro/src/seismic_batch.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index 686ef1a6d..c4d2cc36c 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -185,8 +185,9 @@ def _post_filter_by_mask(self, mask, *args, **kwargs): Note ---- - All components will be changed with given mask and during the proccess, - new SeismicBatch instance will be created with new index. + All batch items in each component should be filtered in decorated action. + This post function created new instance of SeismicBatch with new index + instance. """ _ = args, kwargs if any_action_failed(mask): @@ -691,6 +692,9 @@ def sort_traces(self, index, *args, src, sort_by, dst=None): def drop_zero_traces(self, index, src, num_zero, **kwargs): """Drop traces with sequence of zeros longer than ```num_zero```. + This action drops traces from index instance and from all components + in batch according to the mask obtined calculated on `src` component. + Parameters ---------- num_zero : int @@ -702,6 +706,16 @@ def drop_zero_traces(self, index, src, num_zero, **kwargs): ------- : SeismicBatch Batch without dropped traces. + + Raises + ------ + ValueError : if `src` has no sorting + ValueError : if any component in batch has sorting different from `src` + + Note + ---- + This action creates new instance of SeismicBatch with new index + instance. """ _ = kwargs sorting = self.meta[src]['sorting'] From 32b86d52a1f50766a3a24633b25b5598062fd4ad Mon Sep 17 00:00:00 2001 From: Dmitry Podvyaznikov Date: Fri, 15 Nov 2019 17:00:10 +0300 Subject: [PATCH 11/45] Update docstrings --- seismicpro/src/seismic_batch.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index c4d2cc36c..b353d1bc7 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -176,7 +176,7 @@ def _post_filter_by_mask(self, mask, *args, **kwargs): Parameters ---------- mask : list - list of arrays bool arrays + list of boolean arrays Returns ------- @@ -185,8 +185,8 @@ def _post_filter_by_mask(self, mask, *args, **kwargs): Note ---- - All batch items in each component should be filtered in decorated action. - This post function created new instance of SeismicBatch with new index + Batch items in each component should be filtered in decorated action. + This post function creates new instance of SeismicBatch with new index instance. """ _ = args, kwargs @@ -692,8 +692,8 @@ def sort_traces(self, index, *args, src, sort_by, dst=None): def drop_zero_traces(self, index, src, num_zero, **kwargs): """Drop traces with sequence of zeros longer than ```num_zero```. - This action drops traces from index instance and from all components - in batch according to the mask obtined calculated on `src` component. + This action drops traces from index dataframe and from all batch components + according to the mask calculated on `src` component. Parameters ---------- From e41c776878c659983f6ec65797d2050bfff2b704 Mon Sep 17 00:00:00 2001 From: Dmitry Podvyaznikov Date: Fri, 15 Nov 2019 18:02:49 +0300 Subject: [PATCH 12/45] Allow to drop zero traces without sorting for TraceIndex --- seismicpro/src/seismic_batch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index b353d1bc7..706bc7c3f 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -719,7 +719,7 @@ def drop_zero_traces(self, index, src, num_zero, **kwargs): """ _ = kwargs sorting = self.meta[src]['sorting'] - if sorting is None: + if sorting is None and isinstance(self.index, FieldIndex): raise ValueError('traces in `{}` component should be sorted ' 'before dropping zero traces'.format(src)) From 5df2d206f01b0f439ed165d35e0906428f2cc4da Mon Sep 17 00:00:00 2001 From: Dmitry Podvyaznikov Date: Fri, 15 Nov 2019 18:04:30 +0300 Subject: [PATCH 13/45] Update docstring --- seismicpro/src/seismic_batch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index 706bc7c3f..48316b654 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -709,7 +709,7 @@ def drop_zero_traces(self, index, src, num_zero, **kwargs): Raises ------ - ValueError : if `src` has no sorting + ValueError : if `src` has no sorting and batch index is FieldIndex ValueError : if any component in batch has sorting different from `src` Note From 978097470c4ab135eed9705d7730c0404956edc4 Mon Sep 17 00:00:00 2001 From: nikita Date: Wed, 20 Nov 2019 16:18:50 +0300 Subject: [PATCH 14/45] added transform picking dump style func --- seismicpro/src/utils.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/seismicpro/src/utils.py b/seismicpro/src/utils.py index 6df1333de..236c422ef 100644 --- a/seismicpro/src/utils.py +++ b/seismicpro/src/utils.py @@ -802,3 +802,20 @@ def check_unique_fieldrecord_across_surveys(surveys_by_fieldrecord, index): """ if len(surveys_by_fieldrecord) != 1: raise ValueError('Field {} represents data from more than one survey!'.format(index)) + +def transform_pickingstyle_polytech(path, max_len=(6, 4)): + """ Transforms the format of the csv file with dumped picking to the format acceptible by polytech server. + That means that all columns, no matter how many digits it contains, should be separated by 8 spaces. + Parameters + ---------- + path : str + Path to the file with picking. + max_len : tuple, default is (6, 4) + The number of maximum digits each columns except last contains + """ + df = pd.read_csv(path) + with open(path + 'dump', 'w') as f: + for row in df.iterrows(): + for i, item in enumerate(row[1][:-1]): + f.write(str(item).ljust(max_len[i] + 8)) + f.write(str(row[1][-1]) + '\n') From a5861142710fb20fea2769044b9682830909526d Mon Sep 17 00:00:00 2001 From: nikita Date: Wed, 20 Nov 2019 16:26:41 +0300 Subject: [PATCH 15/45] update shift_pick action --- seismicpro/src/seismic_batch.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index 56c1ca623..29dfad747 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -1266,21 +1266,18 @@ def shift_pick(self, index, src, dst=None, src_raw='raw', shift=1.5*np.pi, thd=0 pos = self.get_pos(None, src, index) pick = getattr(self, src)[pos] trace = getattr(self, src_raw)[pos] - if isinstance(self.index, KNNIndex): trace = trace[0] - trace = np.squeeze(trace) analytic = hilbert(trace) phase = np.unwrap(np.angle(analytic)) phase_diff = phase[pick] - shift - phase_mod = np.abs(phase - phase_diff) - zero = phase_mod.argmin() - - n_skip = (np.abs(trace[zero:]) > thd).argmax() - 1 + phase_mod = phase - phase_diff + phase_mod[phase_mod < 0] = 0 + zero = len(phase_mod) - phase_mod[::-1].argmin() - 1 + n_skip = max((np.abs(trace[zero:]) > thd).argmax() - 1, 0) zero += n_skip - getattr(self, dst)[pos] = zero return self From bdb3d70ad1fa6aeedf5409fe235a556f48412242 Mon Sep 17 00:00:00 2001 From: nikita Date: Wed, 20 Nov 2019 16:35:28 +0300 Subject: [PATCH 16/45] update seismic_batch --- seismicpro/src/seismic_batch.py | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index 98f9fa79b..49b5dced2 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -18,7 +18,7 @@ from .plot_utils import IndexTracker, spectrum_plot, seismic_plot, statistics_plot, gain_plot -PICKS_FILE_HEADERS = ['FieldRecord', 'TraceNumber', 'timeOffset'] +PICKS_FILE_HEADERS = ['FieldRecord', 'TraceNumber', 'FIRST_BREAK_TIME'] ACTIONS_DICT = { @@ -449,7 +449,7 @@ def _dump_single_segy(self, src, path): return self @action - def _dump_picking(self, src, path, traces, to_samples, columns=None, max_len=(6, 4)): + def _dump_picking(self, src, path, traces='raw', to_samples=True, columns=None): """Dump picking to file. Parameters @@ -458,9 +458,9 @@ def _dump_picking(self, src, path, traces, to_samples, columns=None, max_len=(6, Source to get picking from. path : str Output file path. - traces : str + traces : str, default 'raw' Batch component with corresponding traces. - to_samples : bool + to_samples : bool, default True Should be picks converted to time samples. columns: array_like, optional Columns to include in the output file. See PICKS_FILE_HEADERS for default format. @@ -487,15 +487,14 @@ def _dump_picking(self, src, path, traces, to_samples, columns=None, max_len=(6, if isinstance(self.index, KNNIndex): df = df.iloc[::5, :] - df['timeOffset'] = data.astype(int) + df[PICKS_FILE_HEADERS[-1]] = data.astype(int) df = df.reset_index(drop=self.index.name is None)[columns] df.columns = df.columns.droplevel(1) - with open(path, 'a') as f: - for row in df.iterrows(): - for i, item in enumerate(row[1][:-1]): - f.write(str(item).ljust(max_len[i] + 8)) - f.write(str(row[1][-1]) + '\n') + if not os.path.isfile(path): + df.to_csv(path, index=False, header=True, mode='a') + else: + df.to_csv(path, index=False, header=None, mode='a') return self @action @@ -1129,13 +1128,8 @@ def mask_to_pick(self, src, dst, labels=True): if not labels: data = np.argmax(data, axis=1) -<<<<<<< HEAD - dst_data = massive_block(np.stack(data)) - setattr(self, dst, np.array([i for i in dst_data] + [None])[:-1]) -======= dst_data = massive_block(data) setattr(self, dst, np.array(dst_data + [None])[:-1]) # array implicitly converted to object dtype ->>>>>>> master return self @action From 4a084545ab8be4f05fb3d5172e16ea47c50417e4 Mon Sep 17 00:00:00 2001 From: nikita Date: Thu, 21 Nov 2019 14:12:40 +0300 Subject: [PATCH 17/45] update docs --- seismicpro/src/utils.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/seismicpro/src/utils.py b/seismicpro/src/utils.py index 236c422ef..b0ea9c2c6 100644 --- a/seismicpro/src/utils.py +++ b/seismicpro/src/utils.py @@ -806,12 +806,15 @@ def check_unique_fieldrecord_across_surveys(surveys_by_fieldrecord, index): def transform_pickingstyle_polytech(path, max_len=(6, 4)): """ Transforms the format of the csv file with dumped picking to the format acceptible by polytech server. That means that all columns, no matter how many digits it contains, should be separated by 8 spaces. - Parameters + Most of the time 2 columns 'FieldRecord' and 'TraceNumber' contains of 6 and 4 digits respectively, however, + it may vary from segy to segy. + + Parameters ---------- path : str Path to the file with picking. max_len : tuple, default is (6, 4) - The number of maximum digits each columns except last contains + The number of maximum digits each column except last contains """ df = pd.read_csv(path) with open(path + 'dump', 'w') as f: From 2dd9244b36a847f5663b1736411082fe707742d8 Mon Sep 17 00:00:00 2001 From: nikita Date: Thu, 21 Nov 2019 14:21:15 +0300 Subject: [PATCH 18/45] update knn constructor docs --- seismicpro/src/seismic_index.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/seismicpro/src/seismic_index.py b/seismicpro/src/seismic_index.py index cd14759ec..8efc8dc5c 100644 --- a/seismicpro/src/seismic_index.py +++ b/seismicpro/src/seismic_index.py @@ -285,6 +285,8 @@ class KNNIndex(TraceIndex): ---------- n_neighbors : int Group size parameter. + raise_error: bool + Wheather to raise error in case 2 recievers with the same coordinates found on the same shot, default False kwargs : dict Named arguments for ```batchflow.FilesIndex````. @@ -299,8 +301,8 @@ class KNNIndex(TraceIndex): traces. Columns include FieldRecord, TraceNumber, TRACE_SEQUENCE_FILE, file_id and a number of extra_headers if specified. """ - def __init__(self, *args, raise_warning=False, **kwargs): - self.raise_warning = raise_warning + def __init__(self, *args, raise_error=False, **kwargs): + self.raise_error = raise_error kwargs['index_name'] = 'KNN' super().__init__(*args, **kwargs) @@ -319,7 +321,7 @@ def build_df(self, n_neighbors, **kwargs): nbrs = NearestNeighbors(n_neighbors=n_neighbors, algorithm='ball_tree') _, indices = nbrs.fit(data).kneighbors(data) if not np.all(indices[:, 0] == np.arange(len(data))): - if self.raise_warning: + if self.raise_error: raise ValueError("Faild to build KNNIndex. Duplicated CDP.") dfs.append(df.iloc[np.hstack(indices)]) From 88c62c4a1c6fbef74340f4a9e580339c84ebbdb9 Mon Sep 17 00:00:00 2001 From: nikita Date: Thu, 21 Nov 2019 14:55:12 +0300 Subject: [PATCH 19/45] update shift pick action --- seismicpro/src/seismic_batch.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index 49b5dced2..7701ee387 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -1263,7 +1263,24 @@ def equalize(self, index, src, dst, params, survey_id_col=None): @action @inbatch_parallel(init='_init_component', target="threads") def shift_pick(self, index, src, dst=None, src_raw='raw', shift=1.5*np.pi, thd=0.05): - """ Shifts picking time on given phase""" + """ Shifts picking time stored in `src` component on the given phase along the traces stored in `src_raw`. + + Parameters + ---------- + src : str + The batch components to get the data from. + dst : str + The batch components to put the result in. + src_raw: str + The batch components where the traces are stored, default 'raw' + shift: float + The amount of phase to shift, default is 1.5 * np.pi which corresponds to transfering picking times + from 'max' to 'zero' type. + thd: float + Threshold determining how many trace samples with low amplitudes, less then thd, can be skipped. + Introduced because of the unstable behaviour of the hilbert transform at the begining of the signal. + + """ pos = self.get_pos(None, src, index) pick = getattr(self, src)[pos] trace = getattr(self, src_raw)[pos] @@ -1273,11 +1290,11 @@ def shift_pick(self, index, src, dst=None, src_raw='raw', shift=1.5*np.pi, thd=0 analytic = hilbert(trace) phase = np.unwrap(np.angle(analytic)) - - phase_diff = phase[pick] - shift - phase_mod = phase - phase_diff + # finding x such that phase[x] = phase[pick] - shift + phase_mod = phase - phase[pick] + shift phase_mod[phase_mod < 0] = 0 - zero = len(phase_mod) - phase_mod[::-1].argmin() - 1 + zero = len(phase_mod) - phase_mod[::-1].argmin() - 1 # in case phase_mod reaches 0 find the index of last one + # skip the trace samples with amplitudes < thd, starting from the `zero` sample n_skip = max((np.abs(trace[zero:]) > thd).argmax() - 1, 0) zero += n_skip getattr(self, dst)[pos] = zero From 14966ecb51a9e8ca76762410331303fcb380233f Mon Sep 17 00:00:00 2001 From: Dmitry Podvyaznikov Date: Fri, 22 Nov 2019 12:38:42 +0300 Subject: [PATCH 20/45] Update logic for TraceIndex in drop_zero_traces --- seismicpro/src/seismic_batch.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index 48316b654..b7cdc5d12 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -665,7 +665,7 @@ def sort_traces(self, index, *args, src, sort_by, dst=None): Batch with new trace sorting. """ _ = args - sorting = self.meta[dst]['sorting'] + sorting = self.meta[src]['sorting'] pos = self.get_pos(None, src, index) df = self.index.get_df([index]) @@ -678,7 +678,7 @@ def sort_traces(self, index, *args, src, sort_by, dst=None): sorted_index_df = df[cols].sort_values(sorting) order = np.argsort(sorted_index_df[sort_by].values) else: - order = np.argsort(df[sort_by].tolist()) + order = np.argsort(df[sort_by].values) getattr(self, dst)[pos] = getattr(self, src)[pos][order] @@ -741,10 +741,12 @@ def drop_zero_traces(self, index, src, num_zero, **kwargs): for comp in self.components: getattr(self, comp)[pos] = getattr(self, comp)[pos][mask] - cols = [(INDEX_UID, src), (sorting, '')] - sorted_index_df = self.index.get_df(index)[cols].sort_values(sorting) - order = np.argsort(sorted_index_df[cols[0]].values) - return mask[order] + if sorting: + cols = [(INDEX_UID, src), (sorting, '')] + sorted_index_df = self.index.get_df([index])[cols].sort_values(sorting) + order = np.argsort(sorted_index_df[cols[0]].values) + return mask[order] + return mask @action @inbatch_parallel(init='_init_component') From d25346eb5dfa17f43cb77af9d85cb28295623259 Mon Sep 17 00:00:00 2001 From: Dmitry Podvyaznikov Date: Mon, 25 Nov 2019 12:15:17 +0300 Subject: [PATCH 21/45] Handle sorting case when component does not have meta --- seismicpro/src/seismic_batch.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index b7cdc5d12..2ce7bda45 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -665,7 +665,10 @@ def sort_traces(self, index, *args, src, sort_by, dst=None): Batch with new trace sorting. """ _ = args - sorting = self.meta[src]['sorting'] + if src in self.meta.keys(): + sorting = self.meta[src].get('sorting') + else: + sorting = None pos = self.get_pos(None, src, index) df = self.index.get_df([index]) From 81054b6c641326c646032bde60e9f1b3cb1ae180 Mon Sep 17 00:00:00 2001 From: user Date: Mon, 25 Nov 2019 16:05:43 +0300 Subject: [PATCH 22/45] update transform picking function --- seismicpro/src/utils.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/seismicpro/src/utils.py b/seismicpro/src/utils.py index b0ea9c2c6..f593062b6 100644 --- a/seismicpro/src/utils.py +++ b/seismicpro/src/utils.py @@ -803,16 +803,17 @@ def check_unique_fieldrecord_across_surveys(surveys_by_fieldrecord, index): if len(surveys_by_fieldrecord) != 1: raise ValueError('Field {} represents data from more than one survey!'.format(index)) -def transform_pickingstyle_polytech(path, max_len=(6, 4)): - """ Transforms the format of the csv file with dumped picking to the format acceptible by polytech server. - That means that all columns, no matter how many digits it contains, should be separated by 8 spaces. - Most of the time 2 columns 'FieldRecord' and 'TraceNumber' contains of 6 and 4 digits respectively, however, - it may vary from segy to segy. +def transform_to_fixed_width_columns(path, n_spaces=8, max_len=(6, 4)): + """ Transforms the format of csv file with dumped picking so all the columns are separated by `n_spaces` spaces. + Most of the time columns 'FieldRecord' and 'TraceNumber' contains of 6 and 4 digits respectively, + however, it may vary. Such transform makes it compatible with specific seismic processing software. Parameters ---------- path : str Path to the file with picking. + n_spaces : int, default is 8 + The number of spaces separating columns. max_len : tuple, default is (6, 4) The number of maximum digits each column except last contains """ @@ -820,5 +821,5 @@ def transform_pickingstyle_polytech(path, max_len=(6, 4)): with open(path + 'dump', 'w') as f: for row in df.iterrows(): for i, item in enumerate(row[1][:-1]): - f.write(str(item).ljust(max_len[i] + 8)) + f.write(str(item).ljust(max_len[i] + n_spaces)) f.write(str(row[1][-1]) + '\n') From 7090fb08846e78c010be78ce8eb967574975697f Mon Sep 17 00:00:00 2001 From: user Date: Mon, 25 Nov 2019 16:43:27 +0300 Subject: [PATCH 23/45] update docs for inference script --- docker_containers/picking_docker/picking_inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker_containers/picking_docker/picking_inference.py b/docker_containers/picking_docker/picking_inference.py index 50bcd6a39..c62e9d911 100644 --- a/docker_containers/picking_docker/picking_inference.py +++ b/docker_containers/picking_docker/picking_inference.py @@ -63,7 +63,7 @@ def predict(path_raw, path_model, num_zero, save_to, batch_size, trace_len, devi device: str or torch.device, default: 'cpu' The device used for inference. Can be 'gpu' in case of avaliavle GPU. shift: int, default: 0 - Picking time correction for the given shift. + Alter the picking times for each trace on the given phase shift. Multiplied by `pi`. """ data = SeismicDataset(TraceIndex(name='raw', path=path_raw)) From f00e4289d214ad8fbf9876c0b1669ac14b4524c8 Mon Sep 17 00:00:00 2001 From: user Date: Wed, 27 Nov 2019 16:19:40 +0300 Subject: [PATCH 24/45] add default values for predict function --- docker_containers/picking_docker/picking_inference.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker_containers/picking_docker/picking_inference.py b/docker_containers/picking_docker/picking_inference.py index c62e9d911..7f87d3957 100644 --- a/docker_containers/picking_docker/picking_inference.py +++ b/docker_containers/picking_docker/picking_inference.py @@ -28,7 +28,7 @@ def make_prediction(): parser.add_argument('-bs', '--batch_size', type=int, help="The number of traces in \ the batch for inference stage.", default=1000) parser.add_argument('-ts', '--trace_len', type=int, help="The number of first samples \ - of the trace to load.", default=751) + of the trace to load.", default=1000) parser.add_argument('-dvc', '--device', type=str, help="The device for \ inference. Can be 'cpu' or 'gpu'.", default='cpu') parser.add_argument('-s', '--shift', type=float, help="Picking time phase shift", default=0) @@ -43,7 +43,7 @@ def make_prediction(): shift = args.shift predict(path_raw, model, num_zero, save_to, batch_size, trace_len, device, shift) -def predict(path_raw, path_model, num_zero, save_to, batch_size, trace_len, device, shift): +def predict(path_raw, path_model, num_zero=100, save_to='dump.csv', batch_size=1000, trace_len=1000, device='cpu', shift=0): """Make predictions and dump results using loaded model and path to data. Parameters From 6c936965b0d8a2f4e06a17a9bdd26de7f401a724 Mon Sep 17 00:00:00 2001 From: user Date: Wed, 27 Nov 2019 19:25:39 +0300 Subject: [PATCH 25/45] modify transform picking style function --- seismicpro/src/utils.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/seismicpro/src/utils.py b/seismicpro/src/utils.py index f593062b6..7aa5241d2 100644 --- a/seismicpro/src/utils.py +++ b/seismicpro/src/utils.py @@ -1,5 +1,9 @@ """ Seismic batch tools """ +import csv +import shutil +import tempfile import functools + import numpy as np import pandas as pd from sklearn.linear_model import LinearRegression @@ -803,23 +807,29 @@ def check_unique_fieldrecord_across_surveys(surveys_by_fieldrecord, index): if len(surveys_by_fieldrecord) != 1: raise ValueError('Field {} represents data from more than one survey!'.format(index)) -def transform_to_fixed_width_columns(path, n_spaces=8, max_len=(6, 4)): + +def transform_to_fixed_width_columns(path, path_save=None, n_spaces=8, max_len=(6, 4)): """ Transforms the format of csv file with dumped picking so all the columns are separated by `n_spaces` spaces. + Such transform makes it compatible with specific seismic processing software. Most of the time columns 'FieldRecord' and 'TraceNumber' contains of 6 and 4 digits respectively, - however, it may vary. Such transform makes it compatible with specific seismic processing software. + however, it may vary. Parameters ---------- path : str Path to the file with picking. + path_save : str, optional + Path where the result would be stored. By default the file would be overwritten. n_spaces : int, default is 8 The number of spaces separating columns. max_len : tuple, default is (6, 4) The number of maximum digits each column except last contains """ - df = pd.read_csv(path) - with open(path + 'dump', 'w') as f: - for row in df.iterrows(): - for i, item in enumerate(row[1][:-1]): - f.write(str(item).ljust(max_len[i] + n_spaces)) - f.write(str(row[1][-1]) + '\n') + with open(path, 'r', newline='') as read_file: + reader = csv.reader(read_file) + with tempfile.NamedTemporaryFile(mode='w') as write_file: + for row in reader: + for i, item in enumerate(row[:-1]): + write_file.write(str(item).ljust(max_len[i] + n_spaces)) + write_file.write(str(row[-1]) + '\n') + shutil.copyfile(write_file.name, path_save or path) From 41506f7c7f9ab8ea43fff6c931b9bb7bd4ce6456 Mon Sep 17 00:00:00 2001 From: user Date: Wed, 27 Nov 2019 21:01:56 +0300 Subject: [PATCH 26/45] add comments for transform pickingstyle func --- seismicpro/src/utils.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/seismicpro/src/utils.py b/seismicpro/src/utils.py index 7aa5241d2..f16f46b46 100644 --- a/seismicpro/src/utils.py +++ b/seismicpro/src/utils.py @@ -825,11 +825,22 @@ def transform_to_fixed_width_columns(path, path_save=None, n_spaces=8, max_len=( max_len : tuple, default is (6, 4) The number of maximum digits each column except last contains """ + if path_save is not None: + write_object = open(path_save, 'w') + # in case you want to overwrite the existing file, temporary file would be created. + # the intermediate results would be saved to this temp file, in the end original file + # would be replaced with temporary one, afterwards temp file deleted + else: + write_object = tempfile.NamedTemporaryFile(mode='w', delete=True) + with open(path, 'r', newline='') as read_file: reader = csv.reader(read_file) - with tempfile.NamedTemporaryFile(mode='w') as write_file: + with write_object as write_file: for row in reader: for i, item in enumerate(row[:-1]): write_file.write(str(item).ljust(max_len[i] + n_spaces)) write_file.write(str(row[-1]) + '\n') - shutil.copyfile(write_file.name, path_save or path) + + if path_save: + return + shutil.copyfile(write_file.name, path) From 7ebe04924270202336e64acafb56f0da18e01522 Mon Sep 17 00:00:00 2001 From: user Date: Thu, 28 Nov 2019 11:49:20 +0300 Subject: [PATCH 27/45] PR comments --- .../picking_docker/picking_inference.py | 5 ++-- seismicpro/src/seismic_batch.py | 23 ++++++++++--------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/docker_containers/picking_docker/picking_inference.py b/docker_containers/picking_docker/picking_inference.py index 7f87d3957..302cbbdcf 100644 --- a/docker_containers/picking_docker/picking_inference.py +++ b/docker_containers/picking_docker/picking_inference.py @@ -43,7 +43,8 @@ def make_prediction(): shift = args.shift predict(path_raw, model, num_zero, save_to, batch_size, trace_len, device, shift) -def predict(path_raw, path_model, num_zero=100, save_to='dump.csv', batch_size=1000, trace_len=1000, device='cpu', shift=0): +def predict(path_raw, path_model, num_zero=100, save_to='dump.csv', + batch_size=1000, trace_len=1000, device='cpu', shift=0): """Make predictions and dump results using loaded model and path to data. Parameters @@ -63,7 +64,7 @@ def predict(path_raw, path_model, num_zero=100, save_to='dump.csv', batch_size=1 device: str or torch.device, default: 'cpu' The device used for inference. Can be 'gpu' in case of avaliavle GPU. shift: int, default: 0 - Alter the picking times for each trace on the given phase shift. Multiplied by `pi`. + Shift the picking times for each trace on the given phase shift. Multiplied by `pi`. """ data = SeismicDataset(TraceIndex(name='raw', path=path_raw)) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index 7701ee387..80aa1dd52 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -474,7 +474,10 @@ def _dump_picking(self, src, path, traces='raw', to_samples=True, columns=None): if to_samples: data = self.meta[traces]['samples'][data] - if columns is None: + if columns is not None: + if PICKS_FILE_HEADERS[-1] not in columns: + raise ValueError('Columns must contain', PICKS_FILE_HEADERS[-1]) + else: columns = PICKS_FILE_HEADERS df = self.index.get_df(reset=False) @@ -484,9 +487,6 @@ def _dump_picking(self, src, path, traces='raw', to_samples=True, columns=None): df = df.loc[self.indices] - if isinstance(self.index, KNNIndex): - df = df.iloc[::5, :] - df[PICKS_FILE_HEADERS[-1]] = data.astype(int) df = df.reset_index(drop=self.index.name is None)[columns] df.columns = df.columns.droplevel(1) @@ -1262,7 +1262,7 @@ def equalize(self, index, src, dst, params, survey_id_col=None): @action @inbatch_parallel(init='_init_component', target="threads") - def shift_pick(self, index, src, dst=None, src_raw='raw', shift=1.5*np.pi, thd=0.05): + def shift_pick_phase(self, index, src, dst=None, src_raw='raw', shift=1.5*np.pi, threshold=0.05): """ Shifts picking time stored in `src` component on the given phase along the traces stored in `src_raw`. Parameters @@ -1276,7 +1276,7 @@ def shift_pick(self, index, src, dst=None, src_raw='raw', shift=1.5*np.pi, thd=0 shift: float The amount of phase to shift, default is 1.5 * np.pi which corresponds to transfering picking times from 'max' to 'zero' type. - thd: float + threshold: float Threshold determining how many trace samples with low amplitudes, less then thd, can be skipped. Introduced because of the unstable behaviour of the hilbert transform at the begining of the signal. @@ -1293,9 +1293,10 @@ def shift_pick(self, index, src, dst=None, src_raw='raw', shift=1.5*np.pi, thd=0 # finding x such that phase[x] = phase[pick] - shift phase_mod = phase - phase[pick] + shift phase_mod[phase_mod < 0] = 0 - zero = len(phase_mod) - phase_mod[::-1].argmin() - 1 # in case phase_mod reaches 0 find the index of last one - # skip the trace samples with amplitudes < thd, starting from the `zero` sample - n_skip = max((np.abs(trace[zero:]) > thd).argmax() - 1, 0) - zero += n_skip - getattr(self, dst)[pos] = zero + # in case phase_mod reaches 0 multiple times find the index of last one + x = len(phase_mod) - phase_mod[::-1].argmin() - 1 + # skip the trace samples with amplitudes < threshold, starting from the `zero` sample + n_skip = max((np.abs(trace[x:]) > threshold).argmax() - 1, 0) + x += n_skip + getattr(self, dst)[pos] = x return self From 693eec9ce75f015e4ba3c9a7aa4bf11e92e17cb4 Mon Sep 17 00:00:00 2001 From: user Date: Thu, 28 Nov 2019 12:41:40 +0300 Subject: [PATCH 28/45] add picking to meta --- seismicpro/src/seismic_batch.py | 1 + 1 file changed, 1 insertion(+) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index 80aa1dd52..a7b51a7f0 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -530,6 +530,7 @@ def _load_picking(self, components): res = np.split(idf.FIRST_BREAK_TIME.values, np.cumsum(self.index.tracecounts))[:-1] self.add_components(components, init=res) + self.meta.update({components:dict(sorting=None)}) return self @apply_to_each_component From 3ced378dfda35a8185ed03415aab407d91ae7613 Mon Sep 17 00:00:00 2001 From: user Date: Thu, 28 Nov 2019 13:01:38 +0300 Subject: [PATCH 29/45] replace np.arange with slice --- docker_containers/picking_docker/picking_inference.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker_containers/picking_docker/picking_inference.py b/docker_containers/picking_docker/picking_inference.py index 302cbbdcf..0e16cb742 100644 --- a/docker_containers/picking_docker/picking_inference.py +++ b/docker_containers/picking_docker/picking_inference.py @@ -82,7 +82,7 @@ def predict(path_raw, path_model, num_zero=100, save_to='dump.csv', test_tmpl = (data.p .init_model('dynamic', UNet, 'my_model', config=config_predict) - .load(components='raw', fmt='segy', tslice=np.arange(trace_len)) + .load(components='raw', fmt='segy', tslice=slice(0, trace_len)) .drop_zero_traces(num_zero=num_zero, src='raw') .standardize(src='raw', dst='raw') .add_components(components='predictions') From 14432be96d81904414eacb74658300993e477b84 Mon Sep 17 00:00:00 2001 From: user Date: Thu, 28 Nov 2019 14:26:28 +0300 Subject: [PATCH 30/45] change defaut behaviour for KNN constructor --- seismicpro/src/seismic_index.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/seismicpro/src/seismic_index.py b/seismicpro/src/seismic_index.py index 8efc8dc5c..83f4ffdd9 100644 --- a/seismicpro/src/seismic_index.py +++ b/seismicpro/src/seismic_index.py @@ -286,7 +286,7 @@ class KNNIndex(TraceIndex): n_neighbors : int Group size parameter. raise_error: bool - Wheather to raise error in case 2 recievers with the same coordinates found on the same shot, default False + Wheather to raise error in case 2 recievers with the same coordinates found on the same shot, default True kwargs : dict Named arguments for ```batchflow.FilesIndex````. @@ -301,7 +301,7 @@ class KNNIndex(TraceIndex): traces. Columns include FieldRecord, TraceNumber, TRACE_SEQUENCE_FILE, file_id and a number of extra_headers if specified. """ - def __init__(self, *args, raise_error=False, **kwargs): + def __init__(self, *args, raise_error=True, **kwargs): self.raise_error = raise_error kwargs['index_name'] = 'KNN' super().__init__(*args, **kwargs) From 7c3a227b113ebc433d8382b175dfc9e3b2cb4d96 Mon Sep 17 00:00:00 2001 From: user Date: Thu, 28 Nov 2019 14:28:18 +0300 Subject: [PATCH 31/45] remove implicit default values for actions arg src_traces --- seismicpro/src/seismic_batch.py | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index a7b51a7f0..2b8892176 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -449,7 +449,7 @@ def _dump_single_segy(self, src, path): return self @action - def _dump_picking(self, src, path, traces='raw', to_samples=True, columns=None): + def _dump_picking(self, src, path, src_traces, to_miliseconds=True, columns=None): """Dump picking to file. Parameters @@ -458,10 +458,10 @@ def _dump_picking(self, src, path, traces='raw', to_samples=True, columns=None): Source to get picking from. path : str Output file path. - traces : str, default 'raw' + src_traces : str Batch component with corresponding traces. - to_samples : bool, default True - Should be picks converted to time samples. + to_miliseconds : bool, default True + Whether picks should be converted from trace samples to miliseconds. columns: array_like, optional Columns to include in the output file. See PICKS_FILE_HEADERS for default format. @@ -471,8 +471,8 @@ def _dump_picking(self, src, path, traces='raw', to_samples=True, columns=None): Batch unchanged. """ data = getattr(self, src).astype(int) - if to_samples: - data = self.meta[traces]['samples'][data] + if to_miliseconds: + data = self.meta[src_traces]['samples'][data] if columns is not None: if PICKS_FILE_HEADERS[-1] not in columns: @@ -481,12 +481,11 @@ def _dump_picking(self, src, path, traces='raw', to_samples=True, columns=None): columns = PICKS_FILE_HEADERS df = self.index.get_df(reset=False) - sort_by = self.meta[traces]['sorting'] + sort_by = self.meta[src_traces]['sorting'] if sort_by is not None: df = df.sort_values(by=sort_by) df = df.loc[self.indices] - df[PICKS_FILE_HEADERS[-1]] = data.astype(int) df = df.reset_index(drop=self.index.name is None)[columns] df.columns = df.columns.droplevel(1) @@ -1067,7 +1066,7 @@ def standardize(self, src, dst): return self @action - def picking_to_mask(self, src, dst, src_traces='raw'): + def picking_to_mask(self, src, dst, src_traces): """Convert picking time to the mask for TraceIndex. Parameters @@ -1087,8 +1086,8 @@ def picking_to_mask(self, src, dst, src_traces='raw'): data = np.concatenate(getattr(self, src)) samples = self.meta[src_traces]['samples'] - tick = samples[1] - samples[0] - data = np.around(data / tick).astype('int') + rate = samples[1] - samples[0] + data = np.around(data / rate).astype('int') batch_size = data.shape[0] trace_length = getattr(self, src_traces)[0].shape[1] @@ -1263,7 +1262,7 @@ def equalize(self, index, src, dst, params, survey_id_col=None): @action @inbatch_parallel(init='_init_component', target="threads") - def shift_pick_phase(self, index, src, dst=None, src_raw='raw', shift=1.5*np.pi, threshold=0.05): + def shift_pick_phase(self, index, src, src_traces, dst=None, shift=1.5*np.pi, threshold=0.05): """ Shifts picking time stored in `src` component on the given phase along the traces stored in `src_raw`. Parameters @@ -1272,8 +1271,8 @@ def shift_pick_phase(self, index, src, dst=None, src_raw='raw', shift=1.5*np.pi, The batch components to get the data from. dst : str The batch components to put the result in. - src_raw: str - The batch components where the traces are stored, default 'raw' + src_traces: str + The batch components where the traces are stored. shift: float The amount of phase to shift, default is 1.5 * np.pi which corresponds to transfering picking times from 'max' to 'zero' type. @@ -1284,7 +1283,7 @@ def shift_pick_phase(self, index, src, dst=None, src_raw='raw', shift=1.5*np.pi, """ pos = self.get_pos(None, src, index) pick = getattr(self, src)[pos] - trace = getattr(self, src_raw)[pos] + trace = getattr(self, src_traces)[pos] if isinstance(self.index, KNNIndex): trace = trace[0] trace = np.squeeze(trace) From 00ca348adc3016c79a6d48e53512aadc85b6b7f1 Mon Sep 17 00:00:00 2001 From: user Date: Thu, 28 Nov 2019 14:49:24 +0300 Subject: [PATCH 32/45] update action callse signatures --- docker_containers/picking_docker/picking_inference.py | 5 ++--- models/First_break_picking/1d_CNN/model_description.ipynb | 8 ++++---- models/First_break_picking/1d_CNN/research.ipynb | 6 +++--- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/docker_containers/picking_docker/picking_inference.py b/docker_containers/picking_docker/picking_inference.py index 0e16cb742..97af5808c 100644 --- a/docker_containers/picking_docker/picking_inference.py +++ b/docker_containers/picking_docker/picking_inference.py @@ -92,10 +92,9 @@ def predict(path_raw, path_model, num_zero=100, save_to='dump.csv', .mask_to_pick(src='predictions', dst='predictions', labels=False) ) if shift: - test_tmpl += Pipeline().shift_pick(src='predictions', dst='predictions', shift=np.pi*shift) + test_tmpl += Pipeline().shift_pick(src='predictions', dst='predictions', src_traces='raw', shift=np.pi*shift) - test_pipeline = test_tmpl + Pipeline().dump(src='predictions', fmt='picks', path=save_to, - traces='raw', to_samples=True) + test_pipeline = test_tmpl + Pipeline().dump(src='predictions', fmt='picks', path=save_to, src_traces='raw') test_pipeline.run(batch_size, n_epochs=1, drop_last=False, shuffle=False, bar=True) if __name__ == "__main__": diff --git a/models/First_break_picking/1d_CNN/model_description.ipynb b/models/First_break_picking/1d_CNN/model_description.ipynb index 36b53e73a..4ac697f1b 100644 --- a/models/First_break_picking/1d_CNN/model_description.ipynb +++ b/models/First_break_picking/1d_CNN/model_description.ipynb @@ -461,7 +461,7 @@ " .load(components='raw', fmt='segy')\n", " .load(components='markup', fmt='picks')\n", " .standardize(src='raw', dst='raw')\n", - " .picking_to_mask(src='markup', dst='mask')\n", + " .picking_to_mask(src='markup', dst='mask', src_traces='raw')\n", " .init_model('dynamic', UNet, 'my_model', config)\n", " .init_variable('loss', init_on_each_run=list)\n", " .apply_transform_all(src='raw', dst='raw', func=lambda x: np.stack(x))\n", @@ -597,7 +597,7 @@ " .load(components='raw', fmt='segy')\n", " .load(components='markup', fmt='picks')\n", " .standardize(src='raw', dst='raw')\n", - " .picking_to_mask(src='markup', dst='mask')\n", + " .picking_to_mask(src='markup', dst='mask', src_traces='raw')\n", " .apply_transform_all(src='raw', dst='raw', func=lambda x: np.stack(x))\n", " .update_variable('traces', B('raw'), mode='a')\n", " .apply_transform_all(src='mask', dst='mask', func=lambda x: np.stack(x))\n", @@ -607,7 +607,7 @@ " save_to=B('predictions', mode='a'))\n", " .mask_to_pick(src='predictions', dst='predictions', labels=False)\n", " .update_variable('predictions', B('predictions'), mode='a')\n", - " .dump(src='predictions', fmt='picks',path='model_predictions.csv', traces='raw', to_samples=True)\n", + " .dump(src='predictions', fmt='picks',path='model_predictions.csv', src_traces='raw')\n", " .run_later(1000, n_epochs=1, drop_last=False, shuffle=False, bar=True))" ] }, @@ -1306,5 +1306,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/models/First_break_picking/1d_CNN/research.ipynb b/models/First_break_picking/1d_CNN/research.ipynb index e0d1c316d..2353f2b58 100644 --- a/models/First_break_picking/1d_CNN/research.ipynb +++ b/models/First_break_picking/1d_CNN/research.ipynb @@ -137,7 +137,7 @@ " .load(components='raw', fmt='segy')\n", " .load(components='markup', fmt='picks')\n", " .standardize(src='raw', dst='raw')\n", - " .picking_to_mask(src='markup', dst='mask')\n", + " .picking_to_mask(src='markup', dst='mask', src_traces='raw')\n", " .init_model('dynamic', UNet, 'my_model', config)\n", " .init_variable('loss', init_on_each_run=list)\n", " .apply_transform_all(src='raw', dst='raw', func=lambda x: np.stack(x))\n", @@ -153,7 +153,7 @@ " .load(components='raw', fmt='segy')\n", " .load(components='markup', fmt='picks')\n", " .standardize(src='raw', dst='raw')\n", - " .picking_to_mask(src='markup', dst='mask')\n", + " .picking_to_mask(src='markup', dst='mask', src_traces='raw')\n", " .update_variable('true', B('mask'), mode='a')\n", " .add_components(components='predictions')\n", " .apply_transform_all(src='raw', dst='raw', func=lambda x: np.stack(x))\n", @@ -412,5 +412,5 @@ } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } From 64430597118d875a5830c86944f0572f80c41dcc Mon Sep 17 00:00:00 2001 From: user Date: Thu, 28 Nov 2019 14:56:54 +0300 Subject: [PATCH 33/45] rename action call --- docker_containers/picking_docker/picking_inference.py | 2 +- seismicpro/src/seismic_batch.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker_containers/picking_docker/picking_inference.py b/docker_containers/picking_docker/picking_inference.py index 97af5808c..189ececc6 100644 --- a/docker_containers/picking_docker/picking_inference.py +++ b/docker_containers/picking_docker/picking_inference.py @@ -92,7 +92,7 @@ def predict(path_raw, path_model, num_zero=100, save_to='dump.csv', .mask_to_pick(src='predictions', dst='predictions', labels=False) ) if shift: - test_tmpl += Pipeline().shift_pick(src='predictions', dst='predictions', src_traces='raw', shift=np.pi*shift) + test_tmpl += Pipeline().shift_pick_phase(src='predictions', dst='predictions', src_traces='raw', shift=np.pi*shift) test_pipeline = test_tmpl + Pipeline().dump(src='predictions', fmt='picks', path=save_to, src_traces='raw') test_pipeline.run(batch_size, n_epochs=1, drop_last=False, shuffle=False, bar=True) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index 2b8892176..253588fda 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -481,7 +481,7 @@ def _dump_picking(self, src, path, src_traces, to_miliseconds=True, columns=None columns = PICKS_FILE_HEADERS df = self.index.get_df(reset=False) - sort_by = self.meta[src_traces]['sorting'] + sort_by = self.meta[src]['sorting'] if sort_by is not None: df = df.sort_values(by=sort_by) From f64924d29bbc8ab22358a84b94f884b9d1da427a Mon Sep 17 00:00:00 2001 From: user Date: Thu, 28 Nov 2019 15:53:18 +0300 Subject: [PATCH 34/45] modify PICKS_FILE_HEADERS to single column --- seismicpro/src/seismic_batch.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index 253588fda..fefbb630a 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -18,7 +18,7 @@ from .plot_utils import IndexTracker, spectrum_plot, seismic_plot, statistics_plot, gain_plot -PICKS_FILE_HEADERS = ['FieldRecord', 'TraceNumber', 'FIRST_BREAK_TIME'] +PICKS_FILE_HEADER = 'FIRST_BREAK_TIME' ACTIONS_DICT = { @@ -449,7 +449,7 @@ def _dump_single_segy(self, src, path): return self @action - def _dump_picking(self, src, path, src_traces, to_miliseconds=True, columns=None): + def _dump_picking(self, src, path, src_traces, to_miliseconds=True, columns=('FieldRecord', 'TraceNumber')): """Dump picking to file. Parameters @@ -462,8 +462,9 @@ def _dump_picking(self, src, path, src_traces, to_miliseconds=True, columns=None Batch component with corresponding traces. to_miliseconds : bool, default True Whether picks should be converted from trace samples to miliseconds. - columns: array_like, optional - Columns to include in the output file. See PICKS_FILE_HEADERS for default format. + columns: array_like + Columns to include in the output file. + In case `PICKS_FILE_HEADER` not included it will be added automatically. Returns ------- @@ -474,11 +475,8 @@ def _dump_picking(self, src, path, src_traces, to_miliseconds=True, columns=None if to_miliseconds: data = self.meta[src_traces]['samples'][data] - if columns is not None: - if PICKS_FILE_HEADERS[-1] not in columns: - raise ValueError('Columns must contain', PICKS_FILE_HEADERS[-1]) - else: - columns = PICKS_FILE_HEADERS + if PICKS_FILE_HEADER not in columns: + columns = columns + (PICKS_FILE_HEADER, ) df = self.index.get_df(reset=False) sort_by = self.meta[src]['sorting'] @@ -486,7 +484,7 @@ def _dump_picking(self, src, path, src_traces, to_miliseconds=True, columns=None df = df.sort_values(by=sort_by) df = df.loc[self.indices] - df[PICKS_FILE_HEADERS[-1]] = data.astype(int) + df[PICKS_FILE_HEADER] = data.astype(int) df = df.reset_index(drop=self.index.name is None)[columns] df.columns = df.columns.droplevel(1) From cd0907adf81ab1c8bce680ca65b0cd42fbc44cb2 Mon Sep 17 00:00:00 2001 From: Dmitry Podvyaznikov Date: Sun, 1 Dec 2019 13:35:39 +0300 Subject: [PATCH 35/45] Set sorting outside parallel action --- seismicpro/src/seismic_batch.py | 65 +++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 20 deletions(-) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index 2ce7bda45..9bb1e1904 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -644,10 +644,9 @@ def pad_traces(self, index, *args, src, dst=None, **kwargs): getattr(self, dst)[pos] = np.pad(data, **kwargs) return self - @action @inbatch_parallel(init="_init_component", target="threads") @apply_to_each_component - def sort_traces(self, index, *args, src, sort_by, dst=None): + def _sort(self, index, src, sort_by, sorting, dst=None): """Sort traces. Parameters @@ -656,27 +655,20 @@ def sort_traces(self, index, *args, src, sort_by, dst=None): The batch components to get the data from. dst : str, array-like The batch components to put the result in. - sort_by: str + sort_by : str Sorting key. + sorting : str + Current sorting of `src` component Returns ------- batch : SeismicBatch Batch with new trace sorting. """ - _ = args - if src in self.meta.keys(): - sorting = self.meta[src].get('sorting') - else: - sorting = None - pos = self.get_pos(None, src, index) df = self.index.get_df([index]) if sorting: - if sorting == sort_by: - return self - cols = [sorting, sort_by] sorted_index_df = df[cols].sort_values(sorting) order = np.argsort(sorted_index_df[sort_by].values) @@ -684,15 +676,43 @@ def sort_traces(self, index, *args, src, sort_by, dst=None): order = np.argsort(df[sort_by].values) getattr(self, dst)[pos] = getattr(self, src)[pos][order] + return self + + @action + def sort_traces(self, *args, src, sort_by, dst=None): + """Sort traces. + + Parameters + ---------- + src : str, array-like + The batch components to get the data from. + dst : str, array-like + The batch components to put the result in. + sort_by : str + Sorting key. + + Returns + ------- + batch : SeismicBatch + Batch with new trace sorting. + """ + _ = args + if src in self.meta.keys(): + sorting = self.meta[src].get('sorting') + else: + sorting = None + + if sorting == sort_by: + return self - if pos == 0: - self.meta[dst]['sorting'] = sort_by + self._sort(src=src, sort_by=sort_by, sorting=sorting, dst=dst) + self.meta[dst]['sorting'] = sort_by return self @action @inbatch_parallel(init="indices", post='_post_filter_by_mask', target="threads") - def drop_zero_traces(self, index, src, num_zero, **kwargs): + def drop_zero_traces(self, index, src, num_zero, all_comps_sorted=True, **kwargs): """Drop traces with sequence of zeros longer than ```num_zero```. This action drops traces from index dataframe and from all batch components @@ -704,6 +724,9 @@ def drop_zero_traces(self, index, src, num_zero, **kwargs): Size of the sequence of zeros. src : str, array-like The batch components to get the data from. + all_comps_sorted : bool + Check that all components have the same sorting to ensure that they are + modified in a same way. Returns ------- @@ -712,8 +735,9 @@ def drop_zero_traces(self, index, src, num_zero, **kwargs): Raises ------ - ValueError : if `src` has no sorting and batch index is FieldIndex - ValueError : if any component in batch has sorting different from `src` + ValueError : if `src` has no sorting and batch index is FieldIndex. + ValueError : if `all_comps_sorted` is True and any component in batch has + sorting different from `src`. Note ---- @@ -726,9 +750,10 @@ def drop_zero_traces(self, index, src, num_zero, **kwargs): raise ValueError('traces in `{}` component should be sorted ' 'before dropping zero traces'.format(src)) - has_same_sorting = all([self.meta[comp]['sorting'] == sorting for comp in self.components]) - if not has_same_sorting: - raise ValueError('all components in batch should have same sorting') + if all_comps_sorted: + has_same_sorting = all([self.meta[comp]['sorting'] == sorting for comp in self.components]) + if not has_same_sorting: + raise ValueError('all components in batch should have same sorting') pos = self.get_pos(None, src, index) traces = getattr(self, src)[pos] From ae3bdda26a1df6b8e649ce514077e203eadaa9f0 Mon Sep 17 00:00:00 2001 From: Dmitry Podvyaznikov Date: Sun, 1 Dec 2019 17:26:52 +0100 Subject: [PATCH 36/45] `src` should be sorted in all cases except if TraceIndex Co-Authored-By: nikitaK --- seismicpro/src/seismic_batch.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index 9bb1e1904..9b639dadf 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -746,7 +746,7 @@ def drop_zero_traces(self, index, src, num_zero, all_comps_sorted=True, **kwargs """ _ = kwargs sorting = self.meta[src]['sorting'] - if sorting is None and isinstance(self.index, FieldIndex): + if sorting is None and not isinstance(self.index, TraceIndex): raise ValueError('traces in `{}` component should be sorted ' 'before dropping zero traces'.format(src)) From ddd500c8d1a48b1342d9760620f3a65420344d98 Mon Sep 17 00:00:00 2001 From: user Date: Mon, 2 Dec 2019 17:22:40 +0300 Subject: [PATCH 37/45] modify load picking action, picking loaded as np.array --- seismicpro/src/seismic_batch.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index fefbb630a..c8c787ed0 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -522,11 +522,11 @@ def load(self, src=None, fmt=None, components=None, **kwargs): return super().load(src=src, fmt=fmt, components=components, **kwargs) def _load_picking(self, components): - """Load picking from file.""" + """Load picking from dataframe column.""" idf = self.index.get_df(reset=False) - res = np.split(idf.FIRST_BREAK_TIME.values, - np.cumsum(self.index.tracecounts))[:-1] - self.add_components(components, init=res) + ind = np.cumsum(self.index.tracecounts)[:-1] + dst_data = np.split(idf[PICKS_FILE_HEADER].values, ind) + self.add_components(components, init=np.array(dst_data + [None])[:-1]) self.meta.update({components:dict(sorting=None)}) return self From b72454fd58b7738e3395cbb86143e53691499cb5 Mon Sep 17 00:00:00 2001 From: user Date: Mon, 2 Dec 2019 21:10:02 +0300 Subject: [PATCH 38/45] update transform func docs --- seismicpro/src/utils.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/seismicpro/src/utils.py b/seismicpro/src/utils.py index f16f46b46..e10012cde 100644 --- a/seismicpro/src/utils.py +++ b/seismicpro/src/utils.py @@ -809,10 +809,12 @@ def check_unique_fieldrecord_across_surveys(surveys_by_fieldrecord, index): def transform_to_fixed_width_columns(path, path_save=None, n_spaces=8, max_len=(6, 4)): - """ Transforms the format of csv file with dumped picking so all the columns are separated by `n_spaces` spaces. - Such transform makes it compatible with specific seismic processing software. - Most of the time columns 'FieldRecord' and 'TraceNumber' contains of 6 and 4 digits respectively, - however, it may vary. + """ Transforms the format of the csv file with dumped picking so all the columns are separated + by `n_spaces` spaces exactly. To make such transform possible you must provide the maximum number + of digits each column, except the last one, contains. In case, for example, traces are identified + by the 'FieldRecord' and 'TraceNumber' headers, and they varies from 1 to 123456 and from 1 to 1234 respectively, + `max_len` is `(6, 4)`. Such transform makes it compatible with specific seismic processing software. + Parameters ---------- @@ -823,7 +825,7 @@ def transform_to_fixed_width_columns(path, path_save=None, n_spaces=8, max_len=( n_spaces : int, default is 8 The number of spaces separating columns. max_len : tuple, default is (6, 4) - The number of maximum digits each column except last contains + The maximum number of digits each column, except the last one, contains. """ if path_save is not None: write_object = open(path_save, 'w') From f42169c599a14dc91e5e99030f62937b8945522e Mon Sep 17 00:00:00 2001 From: Dmitry Podvyaznikov Date: Mon, 2 Dec 2019 22:51:02 +0300 Subject: [PATCH 39/45] Fix PR comments --- seismicpro/src/seismic_batch.py | 51 +++++++++++++++++---------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index 9bb1e1904..79f2c345d 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -171,7 +171,8 @@ def _init_component(self, *args, dst, **kwargs): return self.indices def _post_filter_by_mask(self, mask, *args, **kwargs): - """Component filtration using the union of all the received masks. + """Index filtration using all received masks. This post function assumes that + components have already been sorted. Parameters ---------- @@ -181,7 +182,7 @@ def _post_filter_by_mask(self, mask, *args, **kwargs): Returns ------- : SeismicBatch - New batch with filtered components and new index. + New batch with new index. Note ---- @@ -202,19 +203,19 @@ def _post_filter_by_mask(self, mask, *args, **kwargs): batch_index = type(self.index).from_index(index=new_index, idf=new_idf, index_name=self.index.name) - batch = type(self)(batch_index) - batch.add_components(self.components) - batch.meta = self.meta + new_batch = type(self)(batch_index) + new_batch.add_components(self.components) + new_batch.meta = self.meta - for comp in batch.components: - setattr(batch, comp, np.array([None] * len(batch.index))) + for comp in new_batch.components: + setattr(new_batch, comp, np.array([None] * len(new_batch.index))) for index in new_index: - for isrc in batch.components: - pos_batch = batch.get_pos(None, isrc, index) - pos_self = self.get_pos(None, isrc, index) - getattr(batch, isrc)[pos_batch] = getattr(self, isrc)[pos_self] - return batch + for isrc in new_batch.components: + pos_new = new_batch.get_pos(None, isrc, index) + pos_old = self.get_pos(None, isrc, index) + getattr(new_batch, isrc)[pos_new] = getattr(self, isrc)[pos_old] + return new_batch def trace_headers(self, header, flatten=False): """Get trace heades. @@ -646,7 +647,7 @@ def pad_traces(self, index, *args, src, dst=None, **kwargs): @inbatch_parallel(init="_init_component", target="threads") @apply_to_each_component - def _sort(self, index, src, sort_by, sorting, dst=None): + def _sort(self, index, src, sort_by, current_sorting, dst=None): """Sort traces. Parameters @@ -657,7 +658,7 @@ def _sort(self, index, src, sort_by, sorting, dst=None): The batch components to put the result in. sort_by : str Sorting key. - sorting : str + current_sorting : str Current sorting of `src` component Returns @@ -668,9 +669,9 @@ def _sort(self, index, src, sort_by, sorting, dst=None): pos = self.get_pos(None, src, index) df = self.index.get_df([index]) - if sorting: - cols = [sorting, sort_by] - sorted_index_df = df[cols].sort_values(sorting) + if current_sorting: + cols = [current_sorting, sort_by] + sorted_index_df = df[cols].sort_values(current_sorting) order = np.argsort(sorted_index_df[sort_by].values) else: order = np.argsort(df[sort_by].values) @@ -679,7 +680,7 @@ def _sort(self, index, src, sort_by, sorting, dst=None): return self @action - def sort_traces(self, *args, src, sort_by, dst=None): + def sort_traces(self, *args, src, sort_by, dst): """Sort traces. Parameters @@ -698,14 +699,14 @@ def sort_traces(self, *args, src, sort_by, dst=None): """ _ = args if src in self.meta.keys(): - sorting = self.meta[src].get('sorting') + current_sorting = self.meta[src].get('sorting') else: - sorting = None + current_sorting = None - if sorting == sort_by: + if current_sorting == sort_by: return self - self._sort(src=src, sort_by=sort_by, sorting=sorting, dst=dst) + self._sort(src=src, sort_by=sort_by, current_sorting=current_sorting, dst=dst) self.meta[dst]['sorting'] = sort_by return self @@ -721,7 +722,7 @@ def drop_zero_traces(self, index, src, num_zero, all_comps_sorted=True, **kwargs Parameters ---------- num_zero : int - Size of the sequence of zeros. + All traces that contain more than `num_zero` consecutive zeros will be removed. src : str, array-like The batch components to get the data from. all_comps_sorted : bool @@ -751,7 +752,7 @@ def drop_zero_traces(self, index, src, num_zero, all_comps_sorted=True, **kwargs 'before dropping zero traces'.format(src)) if all_comps_sorted: - has_same_sorting = all([self.meta[comp]['sorting'] == sorting for comp in self.components]) + has_same_sorting = all(self.meta[comp]['sorting'] == sorting for comp in self.components) if not has_same_sorting: raise ValueError('all components in batch should have same sorting') @@ -759,7 +760,7 @@ def drop_zero_traces(self, index, src, num_zero, all_comps_sorted=True, **kwargs traces = getattr(self, src)[pos] mask = list() for trace in traces: - nonzero_indices = np.nonzero(trace)[0] + nonzero_indices = np.flatnonzero(trace) # add -1 and len(trace) indices to count leading and trailing zero sequences nonzero_indices = np.concatenate(([-1], nonzero_indices, [len(trace)])) zero_seqs = np.diff(nonzero_indices) - 1 From 0bd72daa01737e715cd530e93a80bb51fd2b8446 Mon Sep 17 00:00:00 2001 From: Dmitry Podvyaznikov Date: Mon, 2 Dec 2019 22:57:30 +0300 Subject: [PATCH 40/45] Fix docstrings --- seismicpro/src/seismic_batch.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index 7fd3337c0..f49e7aebf 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -172,7 +172,7 @@ def _init_component(self, *args, dst, **kwargs): def _post_filter_by_mask(self, mask, *args, **kwargs): """Index filtration using all received masks. This post function assumes that - components have already been sorted. + components have already been filtered. Parameters ---------- @@ -188,7 +188,8 @@ def _post_filter_by_mask(self, mask, *args, **kwargs): ---- Batch items in each component should be filtered in decorated action. This post function creates new instance of SeismicBatch with new index - instance. + instance and copies filtered components from original batch for elements + in new index. """ _ = args, kwargs if any_action_failed(mask): From 3f4a59194487614a90f29eba2e759b6f437188eb Mon Sep 17 00:00:00 2001 From: user Date: Tue, 3 Dec 2019 11:41:20 +0300 Subject: [PATCH 41/45] PR comments --- .../picking_docker/picking_inference.py | 6 +++--- seismicpro/src/seismic_batch.py | 21 ++++++++++--------- seismicpro/src/utils.py | 4 ++-- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/docker_containers/picking_docker/picking_inference.py b/docker_containers/picking_docker/picking_inference.py index 189ececc6..0ed2140c0 100644 --- a/docker_containers/picking_docker/picking_inference.py +++ b/docker_containers/picking_docker/picking_inference.py @@ -63,8 +63,8 @@ def predict(path_raw, path_model, num_zero=100, save_to='dump.csv', The number of first samples in the trace to load to the pipeline. device: str or torch.device, default: 'cpu' The device used for inference. Can be 'gpu' in case of avaliavle GPU. - shift: int, default: 0 - Shift the picking times for each trace on the given phase shift. Multiplied by `pi`. + shift: float, default: 0 + Shift the picking times for each trace on the given phase shift, measured in radians. """ data = SeismicDataset(TraceIndex(name='raw', path=path_raw)) @@ -92,7 +92,7 @@ def predict(path_raw, path_model, num_zero=100, save_to='dump.csv', .mask_to_pick(src='predictions', dst='predictions', labels=False) ) if shift: - test_tmpl += Pipeline().shift_pick_phase(src='predictions', dst='predictions', src_traces='raw', shift=np.pi*shift) + test_tmpl += Pipeline().shift_pick_phase(src='predictions', dst='predictions', src_traces='raw', shift=shift) test_pipeline = test_tmpl + Pipeline().dump(src='predictions', fmt='picks', path=save_to, src_traces='raw') test_pipeline.run(batch_size, n_epochs=1, drop_last=False, shuffle=False, bar=True) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index c8c787ed0..5c5c33f93 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -1260,25 +1260,26 @@ def equalize(self, index, src, dst, params, survey_id_col=None): @action @inbatch_parallel(init='_init_component', target="threads") - def shift_pick_phase(self, index, src, src_traces, dst=None, shift=1.5*np.pi, threshold=0.05): - """ Shifts picking time stored in `src` component on the given phase along the traces stored in `src_raw`. + def shift_pick_phase(self, index, src, src_traces, dst=None, shift=1.5, threshold=0.05): + """ Shifts picking time stored in `src` component on the given phase along the traces stored in `src_traces`. Parameters ---------- src : str - The batch components to get the data from. + The batch component to get picking from. dst : str - The batch components to put the result in. + The batch component to put the result in. src_traces: str - The batch components where the traces are stored. + The batch component where the traces are stored. shift: float - The amount of phase to shift, default is 1.5 * np.pi which corresponds to transfering picking times - from 'max' to 'zero' type. + The amount of phase to shift measured in radians. Default is 1.5 , which corresponds + to transfering the picking times from 'max' to 'zero' type. threshold: float - Threshold determining how many trace samples with low amplitudes, less then thd, can be skipped. - Introduced because of the unstable behaviour of the hilbert transform at the begining of the signal. + Threshold determining amplitude, such that all the samples with amplitude less then threshold would be + skipped. Introduced because of unstable behaviour of the hilbert transform at the begining of the signal. """ + shift *= np.pi pos = self.get_pos(None, src, index) pick = getattr(self, src)[pos] trace = getattr(self, src_traces)[pos] @@ -1289,7 +1290,7 @@ def shift_pick_phase(self, index, src, src_traces, dst=None, shift=1.5*np.pi, th analytic = hilbert(trace) phase = np.unwrap(np.angle(analytic)) # finding x such that phase[x] = phase[pick] - shift - phase_mod = phase - phase[pick] + shift + phase_mod = phase - (phase[pick] - shift) phase_mod[phase_mod < 0] = 0 # in case phase_mod reaches 0 multiple times find the index of last one x = len(phase_mod) - phase_mod[::-1].argmin() - 1 diff --git a/seismicpro/src/utils.py b/seismicpro/src/utils.py index e10012cde..8a6984654 100644 --- a/seismicpro/src/utils.py +++ b/seismicpro/src/utils.py @@ -812,7 +812,7 @@ def transform_to_fixed_width_columns(path, path_save=None, n_spaces=8, max_len=( """ Transforms the format of the csv file with dumped picking so all the columns are separated by `n_spaces` spaces exactly. To make such transform possible you must provide the maximum number of digits each column, except the last one, contains. In case, for example, traces are identified - by the 'FieldRecord' and 'TraceNumber' headers, and they varies from 1 to 123456 and from 1 to 1234 respectively, + by the 'FieldRecord' and 'TraceNumber' headers, and their maximum values are 999999 and 9999 respectively, `max_len` is `(6, 4)`. Such transform makes it compatible with specific seismic processing software. @@ -825,7 +825,7 @@ def transform_to_fixed_width_columns(path, path_save=None, n_spaces=8, max_len=( n_spaces : int, default is 8 The number of spaces separating columns. max_len : tuple, default is (6, 4) - The maximum number of digits each column, except the last one, contains. + Width of each column except last one. """ if path_save is not None: write_object = open(path_save, 'w') From 634ed5387e138db01227c212d808efff08957545 Mon Sep 17 00:00:00 2001 From: user Date: Tue, 3 Dec 2019 12:00:45 +0300 Subject: [PATCH 42/45] modify dump action --- seismicpro/src/seismic_batch.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index 5c5c33f93..18158dcc7 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -449,7 +449,7 @@ def _dump_single_segy(self, src, path): return self @action - def _dump_picking(self, src, path, src_traces, to_miliseconds=True, columns=('FieldRecord', 'TraceNumber')): + def _dump_picking(self, src, path, src_traces, input_units='samples', columns=('FieldRecord', 'TraceNumber')): """Dump picking to file. Parameters @@ -460,8 +460,8 @@ def _dump_picking(self, src, path, src_traces, to_miliseconds=True, columns=('Fi Output file path. src_traces : str Batch component with corresponding traces. - to_miliseconds : bool, default True - Whether picks should be converted from trace samples to miliseconds. + input_units : str + Defines in which units picking is stored in src. Must be one of the 'samples' or 'milliseconds'. columns: array_like Columns to include in the output file. In case `PICKS_FILE_HEADER` not included it will be added automatically. @@ -471,20 +471,20 @@ def _dump_picking(self, src, path, src_traces, to_miliseconds=True, columns=('Fi batch : SeismicBatch Batch unchanged. """ - data = getattr(self, src).astype(int) - if to_miliseconds: + data = getattr(self, src) + if input_units == 'samples': + data = data.astype(int) data = self.meta[src_traces]['samples'][data] if PICKS_FILE_HEADER not in columns: columns = columns + (PICKS_FILE_HEADER, ) df = self.index.get_df(reset=False) - sort_by = self.meta[src]['sorting'] + sort_by = self.meta.get(src, {}).get('sorting') if sort_by is not None: df = df.sort_values(by=sort_by) df = df.loc[self.indices] - df[PICKS_FILE_HEADER] = data.astype(int) df = df.reset_index(drop=self.index.name is None)[columns] df.columns = df.columns.droplevel(1) From 3cea7f580efaf5d24034da059f5b56916e6f0981 Mon Sep 17 00:00:00 2001 From: Dmitry Podvyaznikov Date: Tue, 3 Dec 2019 12:11:11 +0300 Subject: [PATCH 43/45] Fix PR comments --- seismicpro/src/seismic_batch.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index f49e7aebf..82e8a85a4 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -9,7 +9,7 @@ from ..batchflow import action, inbatch_parallel, Batch, any_action_failed -from .seismic_index import SegyFilesIndex, FieldIndex +from .seismic_index import SegyFilesIndex, FieldIndex, TraceIndex from .utils import (FILE_DEPENDEND_COLUMNS, partialmethod, calculate_sdc_for_field, massive_block, check_unique_fieldrecord_across_surveys) @@ -205,17 +205,16 @@ def _post_filter_by_mask(self, mask, *args, **kwargs): index_name=self.index.name) new_batch = type(self)(batch_index) - new_batch.add_components(self.components) + new_batch.add_components(self.components, len(self.components) * [new_batch.array_of_nones]) new_batch.meta = self.meta - for comp in new_batch.components: - setattr(new_batch, comp, np.array([None] * len(new_batch.index))) - - for index in new_index: - for isrc in new_batch.components: - pos_new = new_batch.get_pos(None, isrc, index) - pos_old = self.get_pos(None, isrc, index) - getattr(new_batch, isrc)[pos_new] = getattr(self, isrc)[pos_old] + for isrc in new_batch.components: + pos_new = [] + pos_old = [] + for index in new_index: + pos_new.append(new_batch.get_pos(None, isrc, index)) + pos_old.append(self.get_pos(None, isrc, index)) + getattr(new_batch, isrc)[pos_new] = getattr(self, isrc)[pos_old] return new_batch def trace_headers(self, header, flatten=False): From da7f3f274ebee3416f9654da80d095ec7582d905 Mon Sep 17 00:00:00 2001 From: user Date: Tue, 3 Dec 2019 12:17:50 +0300 Subject: [PATCH 44/45] sample subset of df columns by list instead of tuple --- seismicpro/src/seismic_batch.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index 18158dcc7..2fc0934b0 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -461,7 +461,8 @@ def _dump_picking(self, src, path, src_traces, input_units='samples', columns=(' src_traces : str Batch component with corresponding traces. input_units : str - Defines in which units picking is stored in src. Must be one of the 'samples' or 'milliseconds'. + Units in which picking is stored in src. Must be one of the 'samples' or 'milliseconds'. + In case 'milliseconds' dumped as is. Otherwise converted to milliseconds first. columns: array_like Columns to include in the output file. In case `PICKS_FILE_HEADER` not included it will be added automatically. @@ -485,7 +486,7 @@ def _dump_picking(self, src, path, src_traces, input_units='samples', columns=(' df = df.sort_values(by=sort_by) df = df.loc[self.indices] - df = df.reset_index(drop=self.index.name is None)[columns] + df = df.reset_index(drop=self.index.name is None)[list(columns)] df.columns = df.columns.droplevel(1) if not os.path.isfile(path): From a13365db9c05ad667275ce7e0cfc3e2cd122c66c Mon Sep 17 00:00:00 2001 From: Dmitry Podvyaznikov Date: Tue, 3 Dec 2019 15:12:28 +0300 Subject: [PATCH 45/45] Raise error if drop_traces's src not in Index --- seismicpro/src/seismic_batch.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/seismicpro/src/seismic_batch.py b/seismicpro/src/seismic_batch.py index 82e8a85a4..90cb08b8d 100644 --- a/seismicpro/src/seismic_batch.py +++ b/seismicpro/src/seismic_batch.py @@ -209,11 +209,8 @@ def _post_filter_by_mask(self, mask, *args, **kwargs): new_batch.meta = self.meta for isrc in new_batch.components: - pos_new = [] - pos_old = [] - for index in new_index: - pos_new.append(new_batch.get_pos(None, isrc, index)) - pos_old.append(self.get_pos(None, isrc, index)) + pos_new = new_batch.get_pos(None, isrc, new_batch.indices) + pos_old = self.get_pos(None, isrc, new_batch.indices) getattr(new_batch, isrc)[pos_new] = getattr(self, isrc)[pos_old] return new_batch @@ -772,7 +769,13 @@ def drop_zero_traces(self, index, src, num_zero, all_comps_sorted=True, **kwargs if sorting: cols = [(INDEX_UID, src), (sorting, '')] - sorted_index_df = self.index.get_df([index])[cols].sort_values(sorting) + index_df = self.index.get_df([index]) + if cols[0] not in index_df.columns: + # Level 1 of MultiIndex contains name of common columns ('') at first position and names of + # all columns that relate to specific sgy files. + raise ValueError('`src` should be one of the component names that Index was created with: {}' + ''.format(index_df.columns.levels[1][1:].values)) + sorted_index_df = index_df[cols].sort_values(sorting) order = np.argsort(sorted_index_df[cols[0]].values) return mask[order] return mask