diff --git a/seisflows/optimize/gradient.py b/seisflows/optimize/gradient.py index 72678143..a8da42d3 100644 --- a/seisflows/optimize/gradient.py +++ b/seisflows/optimize/gradient.py @@ -33,7 +33,7 @@ from seisflows.tools import msg, unix from seisflows.tools.config import Dict from seisflows.tools.math import angle, dot -from seisflows.tools.specfem import Model +from seisflows.tools.model import Model from seisflows.plugins import line_search as line_search_dir diff --git a/seisflows/seisflows.py b/seisflows/seisflows.py index b7a74995..a2fa2cba 100755 --- a/seisflows/seisflows.py +++ b/seisflows/seisflows.py @@ -12,23 +12,19 @@ - Write a new function within the SeisFlows class - Add a new subparser with optional arguments to sfparser() - Add subparser to subparser dict at the end of sfparser() + In-function import statements are used to reduce call-time for simpler fx's """ import os import sys -# import warnings import argparse -# import traceback -# from glob import glob -# from IPython import embed -# from obspy import Stream, read +from glob import glob from inspect import getmro -# -# from seisflows import logger, ROOT_DIR, NAMES -# from seisflows.preprocess.default import Default -# from seisflows.tools import unix, msg -# from seisflows.tools.config import load_yaml, custom_import, import_seisflows -# from seisflows.tools.specfem import (getpar, setpar, getpar_vel_model, -# setpar_vel_model, Model) +from seisflows import logger, ROOT_DIR, NAMES +from seisflows.tools import unix, msg +from seisflows.tools.config import load_yaml, custom_import, import_seisflows +from seisflows.tools.specfem import (getpar, setpar, getpar_vel_model, + setpar_vel_model) + def sfparser(): @@ -521,6 +517,8 @@ def configure(self, absolute_paths=False, **kwargs): else if False, use path names relative to the working directory. Defaults to False, uses relative paths. """ + from traceback import format_exc + print("configuring SeisFlows parameter file") def split_module_docstring(mod, idx): @@ -600,7 +598,7 @@ def split_module_docstring(mod, idx): logger.critical( msg.cli(text="seisflows configure traceback", header="error") ) - print(traceback.format_exc()) + print(format_exc()) raise else: unix.rm(temp_par_file) @@ -712,6 +710,7 @@ def clean(self, force=False, **kwargs): :param force: ignore the warning check that precedes the clean() function, useful if you don't want any input messages popping up """ + # Check if the filepaths exist if not os.path.exists(self._args.parameter_file): print(msg.cli(f"SeisFlows parameter file not found: " @@ -750,6 +749,8 @@ def debug(self, **kwargs): interactive environment allowing exploration of the package space. Does not allow stepping through of code (not a breakpoint). """ + from IPython import embed + workflow = import_seisflows(workdir=self._args.workdir, parameter_file=self._args.parameter_file) @@ -822,8 +823,6 @@ def sempar(self, parameter, value=None, skip_print=False, :param skip_print: skip the print statement which is typically sent to stdout after changing parameters. """ - from seisflows.tools.specfem import getpar, setpar, getpar_vel_model - if not os.path.exists(par_file): sys.exit(f"\n\tparameter file '{par_file}' does not exist\n") if parameter is None: @@ -897,7 +896,7 @@ def par(self, parameter, value=None, skip_print=False, **kwargs): :param skip_print: skip the print statement which is typically sent to stdout after changing parameters. """ - from seisflows.tools.specfem import getpar + from warnings import warn if not os.path.exists(self._args.parameter_file): sys.exit(f"\n\tparameter file '{self._args.parameter_file}' " @@ -910,8 +909,7 @@ def par(self, parameter, value=None, skip_print=False, **kwargs): # SeisFlows parameter file dictates upper-case parameters parameter = parameter.upper() if isinstance(value, str) and value.lower() == "none": - warnings.warn("to set values to NoneType, use 'null' not 'none'", - UserWarning) + warn("to set values NoneType, use 'null' not 'none'", UserWarning) # Use the specfem tool to grab related information try: @@ -1055,6 +1053,9 @@ def plotst(self, fids, data_format="ASCII", savefig=None, **kwargs): :param savefig: full path and filename to save the output figure. If NoneType, will not save the figure """ + from obspy import Stream + from seisflows.preprocess.default import Default + # Take advantage of the Default Preprocessing module's read() function plotter = Default(data_format=data_format) assert(data_format.upper() in plotter._acceptable_data_formats), \ @@ -1082,6 +1083,8 @@ def plot2d(self, name=None, parameter=None, cmap=None, savefig=None, :param savefig: optional name and path of filename to save figure to disk """ + from seisflows.tools.model import Model + # Figure out which models/gradients/kernels we can actually plot _, output_dir, _ = getpar(key="path_output", file=self._args.parameter_file, diff --git a/seisflows/solver/specfem3d_chinook.py b/seisflows/solver/specfem3d_chinook.py new file mode 100644 index 00000000..97c1bd6b --- /dev/null +++ b/seisflows/solver/specfem3d_chinook.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 +""" +This class provides utilities for the Seisflows solver interactions with +Specfem3D Cartesian, specifically to run on Chinook. +""" +import sys +import subprocess +from seisflows import logger +from seisflows.tools import unix, msg +from seisflows.solver.specfem3d import Specfem3D + + +class Specfem3D_Chinook(Specfem3D): + """ + Solver SPECFEM3D + ---------------- + SPECFEM3D-specific alterations to the base SPECFEM module + + Parameters + ---------- + + Paths + ----- + *** + """ + __doc__ = Specfem3D.__doc__ + __doc__ + + def _run_binary(self, executable, stdout="solver.log", with_mpi=True): + """ + Calls MPI solver executable to run solver binaries, used by individual + processes to run the solver on system. If the external solver returns a + non-zero exit code (failure), this function will return a negative + boolean. + + .. note:: + This function ASSUMES it is being run from a SPECFEM working + directory, i.e., that the executables are located in ./bin/ + + .. note:: + This is essentially an error-catching wrapper of subprocess.run() + + :type executable: str + :param executable: executable function to call. May or may not start + E.g., acceptable calls for the solver would './bin/xspecfem2D'. + Also accepts additional command line arguments such as: + 'xcombine_sem alpha_kernel kernel_paths...' + :type stdout: str + :param stdout: where to redirect stdout + :type with_mpi: bool + :param with_mpi: If `mpiexec` is given, use MPI to run the executable. + Some executables (e.g., combine_vol_data_vtk) must be run in + serial so this flag allows them to turn off MPI running. + :raises SystemExit: If external numerical solver return any failure + code while running + """ + # Executable may come with additional sub arguments, we only need to + # check that the actually executable exists + if not unix.which(executable.split(" ")[0]): + logger.critical(msg.cli(f"executable '{executable}' does not exist", + header="external solver error", border="=")) + sys.exit(-1) + + # Prepend with `mpiexec` if we are running with MPI + # looks something like: `mpirun -n 4 ./bin/xspecfem2d` + if self._mpiexec and with_mpi: + executable = f"{self._mpiexec} -n {self.nproc} {executable}" + logger.debug(f"running executable with cmd: '{executable}'") + + try: + with open(stdout, "w") as f: + subprocess.run(executable, shell=True, check=True, stdout=f, + stderr=f) + except (subprocess.CalledProcessError, OSError) as e: + logger.critical( + msg.cli("The external numerical solver has returned a " + "nonzero exit code (failure). Consider stopping any " + "currently running jobs to avoid wasted " + "computational resources. Check 'scratch/solver/" + f"mainsolver/{stdout}' for the solvers stdout log " + "message. The failing command and error message are:", + items=[f"exc: {executable}", f"err: {e}"], + header="external solver error", + border="=") + ) + sys.exit(-1) \ No newline at end of file diff --git a/seisflows/tests/test_optimize.py b/seisflows/tests/test_optimize.py index 9f1c7078..cf157cda 100644 --- a/seisflows/tests/test_optimize.py +++ b/seisflows/tests/test_optimize.py @@ -7,7 +7,7 @@ import numpy as np import matplotlib.pyplot as plt from seisflows.tools.config import Dict -from seisflows.tools.specfem import Model +from seisflows.tools.model import Model from seisflows.tools.math import angle from seisflows.optimize.gradient import Gradient from seisflows.optimize.LBFGS import LBFGS diff --git a/seisflows/tests/test_tools.py b/seisflows/tests/test_tools.py index f651b8e9..01c413c0 100644 --- a/seisflows/tests/test_tools.py +++ b/seisflows/tests/test_tools.py @@ -7,7 +7,7 @@ from glob import glob from seisflows import ROOT_DIR from seisflows.tools.config import Dict -from seisflows.tools.specfem import Model +from seisflows.tools.model import Model from seisflows.tools.config import custom_import diff --git a/seisflows/tools/model.py b/seisflows/tools/model.py index e69de29b..af75b811 100644 --- a/seisflows/tools/model.py +++ b/seisflows/tools/model.py @@ -0,0 +1,606 @@ +#!/usr/bin/env python3 +""" +A Model class for interfacing with SPECFEM velocity models, used to read +models into memory, manipulate and write to disk. Also contains plotting +functions for SPECFEM2D models +""" +import os +import numpy as np +import matplotlib.pyplot as plt +from copy import deepcopy +from glob import glob +from seisflows import logger +from seisflows.tools import unix +from seisflows.tools.config import Dict +from seisflows.tools.math import poissons_ratio +from seisflows.tools.graphics import plot_2d_image +from seisflows.tools.specfem import read_fortran_binary, write_fortran_binary + + +class Model: + """ + A container for reading, storing and manipulating model/gradient/kernel + parameters from SPECFEM2D/3D/3D_GLOBE. + Stores metadata information alongside model data allowing models to be + converted to back and forth between vector representations required by the + optimization library. + Also contains utility functions to read/write itself so that models can be + saved alongside their metadata. + """ + # Dictate the parameters that Model can handle, which does not cover all + # files created by SPECFEM, which includes things like 'ibool', 'info' etc. + acceptable_parameters = ["vp", "vs", "rho", + "vpv", "vph", "vsv", "vsh", "eta"] + acceptable_parameters.extend([f"{_}_kernel" for _ in acceptable_parameters]) + + def __init__(self, path=None, fmt="", parameters=None): + """ + Model only needs path to model to determine model parameters. Format + `fmt` can be provided by the user or guessed based on available file + formats + + .. note:: + The `vector` representation is based completely on the `model` + attribute. In order to update the model based on vector manipulation + you must use the update() function. + + :type path: str + :param path: path to SPECFEM model/kernel/gradient files + :type fmt: str + :param fmt: expected format of the files (e.g., '.bin'), if None, will + attempt to guess based on the file extensions found in `path` + Available formats are: .bin, .dat + """ + self.path = path + self.fmt = fmt + self.model = None + self.coordinates = None + self._parameters = parameters + self._ngll = None + self._nproc = None + + # Load an existing model if a valid path is given + if self.path and os.path.exists(path): + # Read existing model from a previously saved .npz file + if os.path.splitext(path)[-1] == ".npz": + self.model, self.coordinates, self._ngll, self.fmt = \ + self.load(file=self.path) + _first_key = list(self.model.keys())[0] + self._nproc = len(self.model[_first_key]) + # Read a SPECFEM model from its native output files + else: + if not self.fmt: + self.fmt = self._guess_file_format() + self._nproc, self.available_parameters = \ + self._get_nproc_parameters() + self.model = self.read(parameters=parameters) + # Coordinates are only useful for SPECFEM2D models + try: + self.coordinates = self.read_coordinates_specfem2d() + except AssertionError: + pass + + # .sorted() enforces parameter order every time, otherwise things + # can get screwy if keys returns different each time + self._parameters = sorted(self.model.keys()) + + @staticmethod + def fnfmt(i="*", val="*", ext="*"): + """ + Expected SPECFEM filename format with some checks to ensure that + wildcards and numbers are accepted. An example filename is: + 'proc000001_vs.bin' + + :type i: int or str + :param i: processor number or wildcard. If given as an integer, will be + converted to a 6 digit zero-leading value to match SPECFEM format + :type val: str + :param val: parameter value (e.g., 'vs') or wildcard + :type ext: str + :param ext: the file format (e.g., '.bin'). If NOT preceded by a '.' + will have one prepended + :rtype: str + :return: filename formatter for use in model manipulation + """ + if not ext.startswith("."): + ext = f".{ext}" + if isinstance(i, int): + filename_format = f"proc{i:0>6}_{val}{ext}" + else: + filename_format = f"proc{i}_{val}{ext}" + return filename_format + + @property + def parameters(self): + """ + Returns a list of parameters which defines the model. + """ + if not self._parameters: + self._parameters = list(self.model.keys()) + return self._parameters + + @property + def ngll(self): + """ + Provide the number of GLL (Gauss Lobatto Legendre) points per processor + chunk. Access hidden attribute `_ngll` in the case that the model + is very large, we only want to count the GLL points once. + + :rtype: list of float + :return: each float represents the number of GLL points for the chunk + number that corresponds to its index + """ + if not self._ngll: + self._update_ngll_from_model() + return self._ngll + + def _update_ngll_from_model(self): + """Convenience function to count NGLL points as length of data arrays + for each parameter processor chunk""" + self._ngll = [] + for proc in self.model[self.parameters[0]]: + self._ngll.append(len(proc)) + + @property + def nproc(self): + """ + Returns the number of processors that define the model/gradient/kernel. + + :rtype: int + :return: number of processors that define the model + """ + if not self._nproc: + self._nproc = len(self.model[self.parameters[0]]) + return self._nproc + + @property + def vector(self): + """ + Conveience property to access the merge() function which creates a + linear vector defining all model parameters + + :rtype: np.array + :return: a linear vector of all model parameters + """ + try: + return self.merge() + except TypeError as e: + raise TypeError("Model cannot merge files into continous " + "vector") from e + + def copy(self): + """Returns a deep copy of self so that models can be transferred""" + return deepcopy(self) + + def read(self, parameters=None): + """ + Utility function to load in SPECFEM models/kernels/gradients saved in + various formats. Will try to guess format of model. Assumes that models + are saved using the following filename format: + + proc{num}_{val}.{format} where `num` is usually a 6 digit number + representing the processor number (e.g., 000000), `val` is the parameter + value of the model/kernel/gradient and `format` is the format of the + file (e.g., bin) + + :type parameters: list of str + :param parameters: unique parameters to load model for, if None will load + all available parameters found in `path` + :rtype: Dict of np arrays + :return: Dictionary where keys correspond to model parameters and values + are vectors (np.arrays) representing the model + """ + if parameters is None: + parameters = self.available_parameters + else: + assert (set(parameters).union(set(self.available_parameters))), ( + f"user-chosen parameters not in available: " + f"{self.available_parameters}" + ) + + # Pick the correct read function based on the file format + load_fx = {".bin": self._read_model_fortran_binary, + ".dat": self._read_model_ascii, + ".adios": self._read_model_adios # TODO Check if this okay + }[self.fmt] + + # Create a dictionary object containing all parameters and their models + parameter_dict = Dict({key: [] for key in parameters}) + for parameter in parameters: + parameter_dict[parameter] = load_fx(parameter=parameter) + + return parameter_dict + + def read_coordinates_specfem2d(self): + """ + Attempt to read coordinate files from the given model definition. + This is only really useful for SPECFEM2D, where we can plot the + model, kernel and gradient using matplotlib. + + .. warning + This will NOT work for SPECEFM3D. When you get to 3D, the + coordinates don't match up one-to-one with the model values so you + need external viewing software (e.g., ParaView) to plot. + + :rtype: Dict + :return: a dictioanary with the X and Z coordinates read in from + a SPECFEM2D model, if applicable + """ + coordinates = {"x": [], "z": []} + if self.fmt == ".bin": + coordinates["x"] = self._read_model_fortran_binary(parameter="x") + coordinates["z"] = self._read_model_fortran_binary(parameter="z") + elif self.fmt == ".dat": + fids = glob(os.path.join(self.path, + self.fnfmt(val="*", ext=".dat"))) + for fid in sorted(fids): + coordinates["x"].append(np.loadtxt(fid).T[:, 0]) + coordinates["z"].append(np.loadtxt(fid).T[:, 0]) + + # Internal check for parameter validity by checking length of coord + # against length of model. If they're not the same, then it's not + # useful to store coordinates because they can't be used for plotting + assert (len(coordinates["x"]) == len(coordinates["z"])), \ + f"coordinate arrays do not match in length" + # Assuming all model parameters have the same length + assert (len(coordinates["x"]) == + len(self.model[list(self.model.keys())[0]])), \ + f"length of coordinates array does not match model length" + + return coordinates + + def merge(self, parameter=None): + """ + Convert dictionary representation `model` to vector representation `m` + where all parameters and processors are stored as a single 1D vector. + This vector representation is used by the optimization library during + model perturbation. + + :type parameter: str + :param parameter: single parameter to retrieve model vector from, + otherwise returns all parameters merged into single vector + :rtype: np.array + :return: vector representation of the model + """ + m = np.array([]) + if parameter is None: + parameters = self.parameters + else: + parameters = [parameter] + + for parameter in parameters: + for iproc in range(self.nproc): + m = np.append(m, self.model[parameter][iproc]) + + return m + + def write(self, path, fmt=None): + """ + Save a SPECFEM model/gradient/kernel vector loaded into memory back to + disk in the appropriate format expected by SPECFEM + """ + unix.mkdir(path) + if fmt is None: + assert (self.fmt is not None), f"must specifiy model format: `fmt`" + fmt = self.fmt + + # Pick the correct read function based on the file format + save_fx = {".bin": self._write_model_fortran_binary, + # ".dat": _write_model_ascii, + # ".adios": _write_model_adios # TODO Check if right + }[fmt] + + save_fx(path=path) + + def split(self, vector=None): + """ + Converts internal vector representation `m` to dictionary representation + `model`. Does this by separating the vector based on how it was + constructed, parameter-wise and processor-wise + + :type vector: np.array + :param vector: allow Model to split an input vector. If none given, + will split the internal vector representation + :rtype: Dict of np.array + :return: dictionary of model parameters split up by number of processors + """ + if vector is None: + vector = self.vector + + model = Dict({key: [] for key in self.parameters}) + for idim, key in enumerate(self.parameters): + for iproc in range(self.nproc): + imin = sum(self.ngll) * idim + sum(self.ngll[:iproc]) + imax = sum(self.ngll) * idim + sum(self.ngll[:iproc + 1]) + model[key].extend([vector[imin:imax]]) + + model[key] = np.array(model[key]) + return model + + def check(self, min_pr=-1., max_pr=0.5): + """ + Checks parameters in the model. If Vs and Vp present, checks poissons + ratio. Checks for negative velocity values. And prints out model + min/max values + """ + if "vs" in self.parameters and "vp" in self.parameters: + pr = poissons_ratio(vp=self.merge(parameter="vp"), + vs=self.merge(parameter="vs")) + if pr.min() < 0: + logger.warning("minimum poisson's ratio is negative") + if pr.max() < min_pr: + logger.warning(f"maximum poisson's ratio out of bounds: " + f"{pr.max():.2f} > {max_pr}") + if pr.min() > max_pr: + logger.warning(f"minimum poisson's ratio out of bounds: " + f"{pr.min():.2f} < {min_pr}") + + if "vs" in self.model and np.hstack(self.model.vs).min() < 0: + logger.warning(f"Vs minimum is negative {self.model.vs.min()}") + + if "vp" in self.model and np.hstack(self.model.vp).min() < 0: + logger.warning(f"Vp minimum is negative {self.model.vp.min()}") + + # Tell the User min and max values of the updated model + for key, vals in self.model.items(): + min_val = np.hstack(vals).min() + max_val = np.hstack(vals).max() + # Choose formatter based on the magnitude of the value + if min_val < 1 or max_val > 1E4: + parts = f"{min_val:.2E} <= {key} <= {max_val:.2E}" + else: + parts = f"{min_val:.2f} <= {key} <= {max_val:.2f}" + logger.info(parts) + + def save(self, path): + """ + Save instance attributes (model, vector, metadata) to disk as an + .npz array so that it can be loaded in at a later time for future use + """ + model = self.split() + if self.coordinates: + # Incase we have model parameters called 'x' or 'z', rename for save + model["x_coord"] = self.coordinates["x"] + model["z_coord"] = self.coordinates["z"] + + np.savez(file=path, fmt=self.fmt, **model) + + def load(self, file): + """ + Load in a previously saved .npz file containing model information + and re-create a Model instance matching the one that was `save`d + + :type file: str + :param file: .npz file to load data from. Must have been created by + Model.save() + :rtype: tuple (Dict, list, str) + :return: (Model Dictionary, ngll points for each slice, file format) + """ + model = Dict() + coords = Dict() + ngll = [] + data = np.load(file=file) + for i, key in enumerate(data.files): + if key == "fmt": + continue + # Special case where we are using SPECFEM2D and carry around coords + elif "coord" in key: + coords[key[0]] = data[key] # drop '_coord' suffix from `save` + else: + model[key] = data[key] + # Assign the number of GLL points per slice. Only needs to happen + # once because all model values should have same points/slice + if not ngll: + for array in model[key]: + ngll.append(len(array)) + + return model, coords, ngll, str(data["fmt"]) + + def update(self, model=None, vector=None): + """ + Update internal model/vector defitions. Because these two quantities + are tied to one another, updating one will update the other. This + function simply makes that easier. + """ + if model is not None: + self.model = model + elif vector is not None: + self.model = self.split(vector=vector) + + def plot2d(self, parameter, cmap=None, show=True, title="", save=None): + """ + Plot internal model parameters as a 2D image plot. + + .. warning:: + This is only available for SPECFEM2D models. SPECFEM3D model + coordinates do not match the model vectors (because the grids are + irregular) and cannot be visualized like this. + + :type parameter: str + :param parameter: chosen internal parameter value to plot. + :type cmap: str + :param cmap: colormap which match available matplotlib colormap. + If None, will choose default colormap based on parameter choice. + :type show: bool + :param show: show the figure after plotting + :type title: str + :param title: optional title to prepend to some values that are + provided by default. Useful for adding information about iteration, + step count etc. + :type save: str + :param save: if not None, full path to figure to save the output image + """ + assert (parameter in self._parameters), \ + f"chosen `parameter` must be in {self._parameters}" + + assert (self.coordinates is not None), ( + f"`plot2d` function requires model coordinates which are only " + f"available for solver SPECFEM2D" + ) + + # Choose default colormap based on parameter values + if cmap is None: + if "kernel" in parameter: + cmap = "seismic_r" + zero_midpoint = True + else: + cmap = "Spectral" + zero_midpoint = False + + # 'Merge' the coordinate matrices to get a vector representation + x, z = np.array([]), np.array([]) + for iproc in range(self.nproc): + x = np.append(x, self.coordinates["x"][iproc]) + z = np.append(z, self.coordinates["z"][iproc]) + data = self.merge(parameter=parameter) + + f, p, cbar = plot_2d_image(x=x, z=z, data=data, cmap=cmap, + zero_midpoint=zero_midpoint) + + # Set some figure labels based on information we know here + ax = plt.gca() + ax.set_xlabel("X [m]") + ax.set_ylabel("Z [m]") + # Allow User to title the figure, e.g., with iteration, step count etc. + _title = (f"{parameter.title()}_min = {data.min()};\n" + f"{parameter.title()}_max = {data.max()};\n" + f"{parameter.title()}_mean = {data.mean()}") + if title: + _title = f"{title}\n{_title}" + ax.set_title(_title) + cbar.ax.set_ylabel(parameter.title(), rotation=270, labelpad=15) + + if save: + plt.savefig(save) + if show: + plt.show() + + def _get_nproc_parameters(self): + """ + Get the number of processors and the available parameters from a list of + output SPECFEM model files. + + :rtype: tuple (int, list) + :return: (number of processors, list of available parameters in dir) + """ + fids = glob(os.path.join(self.path, self.fnfmt(val="*", ext=self.fmt))) + fids = [os.path.basename(_) for _ in fids] # drop full path + fids = [os.path.splitext(_)[0] for _ in fids] # drop extension + + if self.fmt == ".bin": + avail_par = list(set(["_".join(_.split("_")[1:]) for _ in fids])) + nproc = len(glob(os.path.join( + self.path, self.fnfmt(val=avail_par[0], ext=self.fmt))) + ) + elif self.fmt == ".dat": + # e.g., 'proc000000_rho_vp_vs' + _, *avail_par = fids[0].split("_") + nproc = len(fids) + 1 + else: + raise NotImplementedError(f"{self.fmt} is not yet supported by " + f"SeisFlows") + + # Remove any parameters not accepted by Model + avail_par = set(avail_par).intersection(set(self.acceptable_parameters)) + + return nproc, list(avail_par) + + def _guess_file_format(self): + """ + Guess the file format of model/kernel/gradient files if none provided by + the user. Does so by checking file formats against formats expected from + SPECFEM2D/3D/3D_GLOBE + + :rtype: str + :return: file format suffix with a leading '.' e.g., '.bin' + """ + acceptable_formats = {".bin", ".dat"} + + files = glob(os.path.join(self.path, "*")) + suffixes = set([os.path.splitext(_)[1] for _ in files]) + fmt = acceptable_formats.intersection(suffixes) + assert (len(fmt) == 1), ( + f"cannot guess model format, multiple matching acceptable formats " + f"found: {list(suffixes)}" + ) + return list(fmt)[0] # pulling single entry from set + + def _read_model_fortran_binary(self, parameter): + """ + Load Fortran binary models into disk. This is the preferred model format + for SeisFlows <-> SPECFEM interaction + + :type parameter: str + :param parameter: chosen parameter to load model for + :rtype: np.array + :return: vector of model values for given `parameter` + """ + array = [] + fids = glob(os.path.join( + self.path, self.fnfmt(val=parameter, ext=".bin")) + ) + for fid in sorted(fids): # make sure were going in numerical order + array.append(read_fortran_binary(fid)) + + # !!! Causes a visible deprecation warning from NumPy but setting + # !!! array type as 'object' causes problems with pickling and + # !!! merging arrays + array = np.array(array) + + return array + + def _read_model_adios(self, parameter): + """ + Load ADIOS models into disk + + :type parameter: str + :param parameter: chosen parameter to load model for + :rtype: np.array + :return: vector of model values for given `parameter` + """ + raise NotImplementedError("ADIOS file formats are not currently " + "implemented into SeisFlows") + + def _read_model_ascii(self, parameter): + """ + Load ASCII SPECFEM2D models into disk. ASCII models are generally saved + all in a single file with all parameters together as a N column ASCII file + where columns 1 and 2 are the coordinates of the mesh, and the remainder + columns are data corresponding to the filenames + e.g., proc000000_rho_vp_vs.dat, rho is column 3, vp is 4 etc. + + :type parameter: str + :param parameter: chosen parameter to load model for + :rtype: np.array + :return: vector of model values for given `parameter` + """ + fids = glob(os.path.join(self.path, self.fnfmt(val="*", ext=".dat"))) + _, *available_parameters = fids[0].split("_") + assert (parameter in available_parameters), ( + f"{parameter} not available for ASCII model" + ) + # +2 because first 2 columns are the X and Z coordinates in the mesh + array = [] + column_idx = available_parameters.index(parameter) + 2 + for fid in sorted(fids): + array.append(np.loadtxt(fid).T[:, column_idx]) + + return np.array(array) + + def _write_model_fortran_binary(self, path): + """ + Save a SPECFEM model back to Fortran binary format. + Data are written as single precision floating point numbers + + .. note:: + FORTRAN unformatted binaries are bounded by an INT*4 byte count. + This function mimics that behavior by tacking on the boundary data + as 'int32' at the top and bottom of the data array. + https://docs.oracle.com/cd/E19957-01/805-4939/6j4m0vnc4/index.html + """ + for parameter in self.parameters: + for i, data in enumerate(self.model[parameter]): + filename = self.fnfmt(i=i, val=parameter, ext=".bin") + filepath = os.path.join(path, filename) + + write_fortran_binary(arr=data, filename=filepath) \ No newline at end of file diff --git a/seisflows/tools/specfem.py b/seisflows/tools/specfem.py index 14518c8e..fe1dd9ea 100644 --- a/seisflows/tools/specfem.py +++ b/seisflows/tools/specfem.py @@ -3,604 +3,10 @@ i.e., SPECFEM2D/3D/3D_GLOBE """ import os -import matplotlib.pyplot as plt import numpy as np -from copy import deepcopy from glob import glob from seisflows import logger -from seisflows.tools.config import Dict -from seisflows.tools import unix, msg -from seisflows.tools.math import poissons_ratio -from seisflows.tools.graphics import plot_2d_image - - -class Model: - """ - A container for reading, storing and manipulating model/gradient/kernel - parameters from SPECFEM2D/3D/3D_GLOBE. - Stores metadata information alongside model data allowing models to be - converted to back and forth between vector representations required by the - optimization library. - Also contains utility functions to read/write itself so that models can be - saved alongside their metadata. - """ - # Dictate the parameters that Model can handle, which does not cover all - # files created by SPECFEM, which includes things like 'ibool', 'info' etc. - acceptable_parameters = ["vp", "vs", "rho", - "vpv", "vph", "vsv", "vsh", "eta"] - acceptable_parameters.extend([f"{_}_kernel" for _ in acceptable_parameters]) - - def __init__(self, path=None, fmt="", parameters=None): - """ - Model only needs path to model to determine model parameters. Format - `fmt` can be provided by the user or guessed based on available file - formats - - .. note:: - The `vector` representation is based completely on the `model` - attribute. In order to update the model based on vector manipulation - you must use the update() function. - - :type path: str - :param path: path to SPECFEM model/kernel/gradient files - :type fmt: str - :param fmt: expected format of the files (e.g., '.bin'), if None, will - attempt to guess based on the file extensions found in `path` - Available formats are: .bin, .dat - """ - self.path = path - self.fmt = fmt - self.model = None - self.coordinates = None - self._parameters = parameters - self._ngll = None - self._nproc = None - - # Load an existing model if a valid path is given - if self.path and os.path.exists(path): - # Read existing model from a previously saved .npz file - if os.path.splitext(path)[-1] == ".npz": - self.model, self.coordinates, self._ngll, self.fmt = \ - self.load(file=self.path) - _first_key = list(self.model.keys())[0] - self._nproc = len(self.model[_first_key]) - # Read a SPECFEM model from its native output files - else: - if not self.fmt: - self.fmt = self._guess_file_format() - self._nproc, self.available_parameters = \ - self._get_nproc_parameters() - self.model = self.read(parameters=parameters) - # Coordinates are only useful for SPECFEM2D models - try: - self.coordinates = self.read_coordinates_specfem2d() - except AssertionError: - pass - - # .sorted() enforces parameter order every time, otherwise things - # can get screwy if keys returns different each time - self._parameters = sorted(self.model.keys()) - - @staticmethod - def fnfmt(i="*", val="*", ext="*"): - """ - Expected SPECFEM filename format with some checks to ensure that - wildcards and numbers are accepted. An example filename is: - 'proc000001_vs.bin' - - :type i: int or str - :param i: processor number or wildcard. If given as an integer, will be - converted to a 6 digit zero-leading value to match SPECFEM format - :type val: str - :param val: parameter value (e.g., 'vs') or wildcard - :type ext: str - :param ext: the file format (e.g., '.bin'). If NOT preceded by a '.' - will have one prepended - :rtype: str - :return: filename formatter for use in model manipulation - """ - if not ext.startswith("."): - ext = f".{ext}" - if isinstance(i, int): - filename_format = f"proc{i:0>6}_{val}{ext}" - else: - filename_format = f"proc{i}_{val}{ext}" - return filename_format - - @property - def parameters(self): - """ - Returns a list of parameters which defines the model. - """ - if not self._parameters: - self._parameters = list(self.model.keys()) - return self._parameters - - @property - def ngll(self): - """ - Provide the number of GLL (Gauss Lobatto Legendre) points per processor - chunk. Access hidden attribute `_ngll` in the case that the model - is very large, we only want to count the GLL points once. - - :rtype: list of float - :return: each float represents the number of GLL points for the chunk - number that corresponds to its index - """ - if not self._ngll: - self._update_ngll_from_model() - return self._ngll - - def _update_ngll_from_model(self): - """Convenience function to count NGLL points as length of data arrays - for each parameter processor chunk""" - self._ngll = [] - for proc in self.model[self.parameters[0]]: - self._ngll.append(len(proc)) - - @property - def nproc(self): - """ - Returns the number of processors that define the model/gradient/kernel. - - :rtype: int - :return: number of processors that define the model - """ - if not self._nproc: - self._nproc = len(self.model[self.parameters[0]]) - return self._nproc - - @property - def vector(self): - """ - Conveience property to access the merge() function which creates a - linear vector defining all model parameters - - :rtype: np.array - :return: a linear vector of all model parameters - """ - try: - return self.merge() - except TypeError as e: - raise TypeError("Model cannot merge files into continous " - "vector") from e - - def copy(self): - """Returns a deep copy of self so that models can be transferred""" - return deepcopy(self) - - def read(self, parameters=None): - """ - Utility function to load in SPECFEM models/kernels/gradients saved in - various formats. Will try to guess format of model. Assumes that models - are saved using the following filename format: - - proc{num}_{val}.{format} where `num` is usually a 6 digit number - representing the processor number (e.g., 000000), `val` is the parameter - value of the model/kernel/gradient and `format` is the format of the - file (e.g., bin) - - :type parameters: list of str - :param parameters: unique parameters to load model for, if None will load - all available parameters found in `path` - :rtype: Dict of np arrays - :return: Dictionary where keys correspond to model parameters and values - are vectors (np.arrays) representing the model - """ - if parameters is None: - parameters = self.available_parameters - else: - assert(set(parameters).union(set(self.available_parameters))), ( - f"user-chosen parameters not in available: " - f"{self.available_parameters}" - ) - - # Pick the correct read function based on the file format - load_fx = {".bin": self._read_model_fortran_binary, - ".dat": self._read_model_ascii, - ".adios": self._read_model_adios # TODO Check if this okay - }[self.fmt] - - # Create a dictionary object containing all parameters and their models - parameter_dict = Dict({key: [] for key in parameters}) - for parameter in parameters: - parameter_dict[parameter] = load_fx(parameter=parameter) - - return parameter_dict - - def read_coordinates_specfem2d(self): - """ - Attempt to read coordinate files from the given model definition. - This is only really useful for SPECFEM2D, where we can plot the - model, kernel and gradient using matplotlib. - - .. warning - This will NOT work for SPECEFM3D. When you get to 3D, the - coordinates don't match up one-to-one with the model values so you - need external viewing software (e.g., ParaView) to plot. - - :rtype: Dict - :return: a dictioanary with the X and Z coordinates read in from - a SPECFEM2D model, if applicable - """ - coordinates = {"x": [], "z": []} - if self.fmt == ".bin": - coordinates["x"] = self._read_model_fortran_binary(parameter="x") - coordinates["z"] = self._read_model_fortran_binary(parameter="z") - elif self.fmt == ".dat": - fids = glob(os.path.join(self.path, - self.fnfmt(val="*", ext=".dat"))) - for fid in sorted(fids): - coordinates["x"].append(np.loadtxt(fid).T[:, 0]) - coordinates["z"].append(np.loadtxt(fid).T[:, 0]) - - # Internal check for parameter validity by checking length of coord - # against length of model. If they're not the same, then it's not - # useful to store coordinates because they can't be used for plotting - assert(len(coordinates["x"]) == len(coordinates["z"])), \ - f"coordinate arrays do not match in length" - # Assuming all model parameters have the same length - assert(len(coordinates["x"]) == - len(self.model[list(self.model.keys())[0]])), \ - f"length of coordinates array does not match model length" - - return coordinates - - def merge(self, parameter=None): - """ - Convert dictionary representation `model` to vector representation `m` - where all parameters and processors are stored as a single 1D vector. - This vector representation is used by the optimization library during - model perturbation. - - :type parameter: str - :param parameter: single parameter to retrieve model vector from, - otherwise returns all parameters merged into single vector - :rtype: np.array - :return: vector representation of the model - """ - m = np.array([]) - if parameter is None: - parameters = self.parameters - else: - parameters = [parameter] - - for parameter in parameters: - for iproc in range(self.nproc): - m = np.append(m, self.model[parameter][iproc]) - - return m - - def write(self, path, fmt=None): - """ - Save a SPECFEM model/gradient/kernel vector loaded into memory back to - disk in the appropriate format expected by SPECFEM - """ - unix.mkdir(path) - if fmt is None: - assert(self.fmt is not None), f"must specifiy model format: `fmt`" - fmt = self.fmt - - # Pick the correct read function based on the file format - save_fx = {".bin": self._write_model_fortran_binary, - # ".dat": _write_model_ascii, - # ".adios": _write_model_adios # TODO Check if right - }[fmt] - - save_fx(path=path) - - def split(self, vector=None): - """ - Converts internal vector representation `m` to dictionary representation - `model`. Does this by separating the vector based on how it was - constructed, parameter-wise and processor-wise - - :type vector: np.array - :param vector: allow Model to split an input vector. If none given, - will split the internal vector representation - :rtype: Dict of np.array - :return: dictionary of model parameters split up by number of processors - """ - if vector is None: - vector = self.vector - - model = Dict({key: [] for key in self.parameters}) - for idim, key in enumerate(self.parameters): - for iproc in range(self.nproc): - imin = sum(self.ngll) * idim + sum(self.ngll[:iproc]) - imax = sum(self.ngll) * idim + sum(self.ngll[:iproc + 1]) - model[key].extend([vector[imin:imax]]) - - model[key] = np.array(model[key]) - return model - - def check(self, min_pr=-1., max_pr=0.5): - """ - Checks parameters in the model. If Vs and Vp present, checks poissons - ratio. Checks for negative velocity values. And prints out model - min/max values - """ - if "vs" in self.parameters and "vp" in self.parameters: - pr = poissons_ratio(vp=self.merge(parameter="vp"), - vs=self.merge(parameter="vs")) - if pr.min() < 0: - logger.warning("minimum poisson's ratio is negative") - if pr.max() < min_pr: - logger.warning(f"maximum poisson's ratio out of bounds: " - f"{pr.max():.2f} > {max_pr}") - if pr.min() > max_pr: - logger.warning(f"minimum poisson's ratio out of bounds: " - f"{pr.min():.2f} < {min_pr}") - - if "vs" in self.model and np.hstack(self.model.vs).min() < 0: - logger.warning(f"Vs minimum is negative {self.model.vs.min()}") - - if "vp" in self.model and np.hstack(self.model.vp).min() < 0: - logger.warning(f"Vp minimum is negative {self.model.vp.min()}") - - # Tell the User min and max values of the updated model - for key, vals in self.model.items(): - min_val = np.hstack(vals).min() - max_val = np.hstack(vals).max() - # Choose formatter based on the magnitude of the value - if min_val < 1 or max_val> 1E4: - parts = f"{min_val:.2E} <= {key} <= {max_val:.2E}" - else: - parts = f"{min_val:.2f} <= {key} <= {max_val:.2f}" - logger.info(parts) - - def save(self, path): - """ - Save instance attributes (model, vector, metadata) to disk as an - .npz array so that it can be loaded in at a later time for future use - """ - model = self.split() - if self.coordinates: - # Incase we have model parameters called 'x' or 'z', rename for save - model["x_coord"] = self.coordinates["x"] - model["z_coord"] = self.coordinates["z"] - - np.savez(file=path, fmt=self.fmt, **model) - - def load(self, file): - """ - Load in a previously saved .npz file containing model information - and re-create a Model instance matching the one that was `save`d - - :type file: str - :param file: .npz file to load data from. Must have been created by - Model.save() - :rtype: tuple (Dict, list, str) - :return: (Model Dictionary, ngll points for each slice, file format) - """ - model = Dict() - coords = Dict() - ngll = [] - data = np.load(file=file) - for i, key in enumerate(data.files): - if key == "fmt": - continue - # Special case where we are using SPECFEM2D and carry around coords - elif "coord" in key: - coords[key[0]] = data[key] # drop '_coord' suffix from `save` - else: - model[key] = data[key] - # Assign the number of GLL points per slice. Only needs to happen - # once because all model values should have same points/slice - if not ngll: - for array in model[key]: - ngll.append(len(array)) - - return model, coords, ngll, str(data["fmt"]) - - def update(self, model=None, vector=None): - """ - Update internal model/vector defitions. Because these two quantities - are tied to one another, updating one will update the other. This - function simply makes that easier. - """ - if model is not None: - self.model = model - elif vector is not None: - self.model = self.split(vector=vector) - - def plot2d(self, parameter, cmap=None, show=True, title="", save=None): - """ - Plot internal model parameters as a 2D image plot. - - .. warning:: - This is only available for SPECFEM2D models. SPECFEM3D model - coordinates do not match the model vectors (because the grids are - irregular) and cannot be visualized like this. - - :type parameter: str - :param parameter: chosen internal parameter value to plot. - :type cmap: str - :param cmap: colormap which match available matplotlib colormap. - If None, will choose default colormap based on parameter choice. - :type show: bool - :param show: show the figure after plotting - :type title: str - :param title: optional title to prepend to some values that are - provided by default. Useful for adding information about iteration, - step count etc. - :type save: str - :param save: if not None, full path to figure to save the output image - """ - assert(parameter in self._parameters), \ - f"chosen `parameter` must be in {self._parameters}" - - assert(self.coordinates is not None), ( - f"`plot2d` function requires model coordinates which are only " - f"available for solver SPECFEM2D" - ) - - # Choose default colormap based on parameter values - if cmap is None: - if "kernel" in parameter: - cmap = "seismic_r" - zero_midpoint = True - else: - cmap = "Spectral" - zero_midpoint = False - - # 'Merge' the coordinate matrices to get a vector representation - x, z = np.array([]), np.array([]) - for iproc in range(self.nproc): - x = np.append(x, self.coordinates["x"][iproc]) - z = np.append(z, self.coordinates["z"][iproc]) - data = self.merge(parameter=parameter) - - f, p, cbar = plot_2d_image(x=x, z=z, data=data, cmap=cmap, - zero_midpoint=zero_midpoint) - - # Set some figure labels based on information we know here - ax = plt.gca() - ax.set_xlabel("X [m]") - ax.set_ylabel("Z [m]") - # Allow User to title the figure, e.g., with iteration, step count etc. - _title = (f"{parameter.title()}_min = {data.min()};\n" - f"{parameter.title()}_max = {data.max()};\n" - f"{parameter.title()}_mean = {data.mean()}") - if title: - _title = f"{title}\n{_title}" - ax.set_title(_title) - cbar.ax.set_ylabel(parameter.title(), rotation=270, labelpad=15) - - if save: - plt.savefig(save) - if show: - plt.show() - - def _get_nproc_parameters(self): - """ - Get the number of processors and the available parameters from a list of - output SPECFEM model files. - - :rtype: tuple (int, list) - :return: (number of processors, list of available parameters in dir) - """ - fids = glob(os.path.join(self.path, self.fnfmt(val="*", ext=self.fmt))) - fids = [os.path.basename(_) for _ in fids] # drop full path - fids = [os.path.splitext(_)[0] for _ in fids] # drop extension - - if self.fmt == ".bin": - avail_par = list(set(["_".join(_.split("_")[1:]) for _ in fids])) - nproc = len(glob(os.path.join( - self.path, self.fnfmt(val=avail_par[0], ext=self.fmt))) - ) - elif self.fmt == ".dat": - # e.g., 'proc000000_rho_vp_vs' - _, *avail_par = fids[0].split("_") - nproc = len(fids) + 1 - else: - raise NotImplementedError(f"{self.fmt} is not yet supported by " - f"SeisFlows") - - # Remove any parameters not accepted by Model - avail_par = set(avail_par).intersection(set(self.acceptable_parameters)) - - return nproc, list(avail_par) - - def _guess_file_format(self): - """ - Guess the file format of model/kernel/gradient files if none provided by - the user. Does so by checking file formats against formats expected from - SPECFEM2D/3D/3D_GLOBE - - :rtype: str - :return: file format suffix with a leading '.' e.g., '.bin' - """ - acceptable_formats = {".bin", ".dat"} - - files = glob(os.path.join(self.path, "*")) - suffixes = set([os.path.splitext(_)[1] for _ in files]) - fmt = acceptable_formats.intersection(suffixes) - assert (len(fmt) == 1), ( - f"cannot guess model format, multiple matching acceptable formats " - f"found: {list(suffixes)}" - ) - return list(fmt)[0] # pulling single entry from set - - def _read_model_fortran_binary(self, parameter): - """ - Load Fortran binary models into disk. This is the preferred model format - for SeisFlows <-> SPECFEM interaction - - :type parameter: str - :param parameter: chosen parameter to load model for - :rtype: np.array - :return: vector of model values for given `parameter` - """ - array = [] - fids = glob(os.path.join( - self.path, self.fnfmt(val=parameter, ext=".bin")) - ) - for fid in sorted(fids): # make sure were going in numerical order - array.append(read_fortran_binary(fid)) - - # !!! Causes a visible deprecation warning from NumPy but setting - # !!! array type as 'object' causes problems with pickling and - # !!! merging arrays - array = np.array(array) - - return array - - def _read_model_adios(self, parameter): - """ - Load ADIOS models into disk - - :type parameter: str - :param parameter: chosen parameter to load model for - :rtype: np.array - :return: vector of model values for given `parameter` - """ - raise NotImplementedError("ADIOS file formats are not currently " - "implemented into SeisFlows") - - def _read_model_ascii(self, parameter): - """ - Load ASCII SPECFEM2D models into disk. ASCII models are generally saved - all in a single file with all parameters together as a N column ASCII file - where columns 1 and 2 are the coordinates of the mesh, and the remainder - columns are data corresponding to the filenames - e.g., proc000000_rho_vp_vs.dat, rho is column 3, vp is 4 etc. - - :type parameter: str - :param parameter: chosen parameter to load model for - :rtype: np.array - :return: vector of model values for given `parameter` - """ - fids = glob(os.path.join(self.path, self.fnfmt(val="*", ext=".dat"))) - _, *available_parameters = fids[0].split("_") - assert(parameter in available_parameters), ( - f"{parameter} not available for ASCII model" - ) - # +2 because first 2 columns are the X and Z coordinates in the mesh - array = [] - column_idx = available_parameters.index(parameter) + 2 - for fid in sorted(fids): - array.append(np.loadtxt(fid).T[:, column_idx]) - - return np.array(array) - - def _write_model_fortran_binary(self, path): - """ - Save a SPECFEM model back to Fortran binary format. - Data are written as single precision floating point numbers - - .. note:: - FORTRAN unformatted binaries are bounded by an INT*4 byte count. - This function mimics that behavior by tacking on the boundary data - as 'int32' at the top and bottom of the data array. - https://docs.oracle.com/cd/E19957-01/805-4939/6j4m0vnc4/index.html - """ - for parameter in self.parameters: - for i, data in enumerate(self.model[parameter]): - filename = self.fnfmt(i=i, val=parameter, ext=".bin") - filepath = os.path.join(path, filename) - - write_fortran_binary(arr=data, filename=filepath) +from seisflows.tools import msg def check_source_names(path_specfem_data, source_prefix, ntask=None): diff --git a/seisflows/workflow/forward.py b/seisflows/workflow/forward.py index d480807b..ff01910d 100644 --- a/seisflows/workflow/forward.py +++ b/seisflows/workflow/forward.py @@ -11,7 +11,7 @@ from seisflows import logger from seisflows.tools import msg, unix from seisflows.tools.config import Dict -from seisflows.tools.specfem import Model +from seisflows.tools.model import Model class Forward: diff --git a/seisflows/workflow/inversion.py b/seisflows/workflow/inversion.py index 3adf1c4f..c2ab9326 100644 --- a/seisflows/workflow/inversion.py +++ b/seisflows/workflow/inversion.py @@ -25,7 +25,7 @@ from seisflows import logger from seisflows.workflow.migration import Migration from seisflows.tools import msg, unix -from seisflows.tools.specfem import Model +from seisflows.tools.model import Model class Inversion(Migration): diff --git a/seisflows/workflow/migration.py b/seisflows/workflow/migration.py index 25593c02..ccb894bb 100644 --- a/seisflows/workflow/migration.py +++ b/seisflows/workflow/migration.py @@ -21,7 +21,7 @@ from seisflows import logger from seisflows.tools import msg, unix -from seisflows.tools.specfem import Model +from seisflows.tools.model import Model from seisflows.workflow.forward import Forward