add adjoint stations generator and plot histogram of measurements utils

wjlei1990 · wjlei1990 · commit af70cf433f96 · 2017-01-27T00:46:26.000-05:00
diff --git a/pypaw/bins/generate_adjoint_stations.py b/pypaw/bins/generate_adjoint_stations.py
@@ -0,0 +1,50 @@
+#!/usr/bin/env python
+
+"""
+This script will generate the STATIONS_ADJOINT file from
+measurements file and stations file(stations.json). The
+STATIONS_ADJOINT will then be used in adjoint simulations.
+"""
+from __future__ import print_function, division, absolute_import
+import os
+import argparse
+from pprint import pprint
+from .utils import load_json
+from pytomo3d.station.generate_adjoint_stations import \
+    generate_adjoint_stations
+
+
+def main():
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-f', action='store', dest='path_file', required=True,
+                        help="path file")
+    parser.add_argument('-v', action='store_true', dest='verbose',
+                        help="verbose flag")
+    args = parser.parse_args()
+
+    paths = load_json(args.path_file)
+
+    print("Path information:")
+    pprint(paths)
+
+    # load stations
+    station_file = paths["station_file"]
+    stations = load_json(station_file)
+
+    # load measurements
+    measure_files = paths["measure_files"]
+    measurements = {}
+    for period, fn in measure_files.iteritems():
+        measurements[period] = load_json(fn)
+
+    outputfile = paths["output_file"]
+    outputdir = os.path.dirname(outputfile)
+    if not os.path.exists(outputdir):
+        os.makedirs(outputdir)
+
+    generate_adjoint_stations(measurements, stations, outputfile)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pypaw/bins/generate_stations_asdf.py b/pypaw/bins/generate_stations_asdf.py
@@ -1,10 +1,13 @@
 #!/usr/bin/env python
 """
-Scripts that generate stations file from asdf file. If
-there are stations in waveforms, then a file `STATIONS_waveform`
-will be generated. Or if there are stations in AuxlilaryData,
+Scripts that generate stations file from asdf file.
+1) If there are stations in waveforms, then a file `STATIONS`
+will be generated.
+2) if there are stations in AuxlilaryData.AdjointSource,
 then a file `STATIONS_ADJOINT` will be generated.
 
+The output STATIONS file follows the format in SPECFEM3D_GLOBE.
+
 :copyright:
     Wenjie Lei (lei@princeton.edu), 2016
 :license:
@@ -20,7 +23,7 @@
 
 from pypaw.stations import extract_adjoint_stations
 from pypaw.stations import extract_waveform_stations
-from pypaw.stations import write_stations_file
+from pytomo3d.station.utils import write_stations_file
 
 
 def generate_waveform_stations(asdf, outputfn):
diff --git a/pypaw/stations.py b/pypaw/stations.py
@@ -12,19 +12,6 @@
 from __future__ import (print_function, division, absolute_import)
 import pyasdf
 from pytomo3d.station import extract_staxml_info
-import collections
-
-
-def write_stations_file(sta_dict, filename="STATIONS"):
-    """
-    Write station information out to a txt file(in SPECFEM FORMAT)
-    """
-    with open(filename, 'w') as fh:
-        od = collections.OrderedDict(sorted(sta_dict.items()))
-        for _sta_id, _sta in od.iteritems():
-            network, station = _sta_id.split(".")
-            fh.write("%-9s %5s %15.4f %12.4f %10.1f %6.1f\n"
-                     % (station, network, _sta[0], _sta[1], _sta[2], _sta[3]))
 
 
 def extract_station_info_from_asdf(asdf, verbose=False):
diff --git a/scripts/measurements_histogram/README.md b/scripts/measurements_histogram/README.md
@@ -0,0 +1,8 @@
+### Introduction
+
+This script is used to plot histogram of measurements generated by
+pyadjoint, including traveltime(dt) and amplitude(dlnA).
+
+It is very important to monitor the histogram, such as the upper
+bound, lower bound, mean and standard deviation values of the
+measurements distributions during the inversion.
diff --git a/scripts/measurements_histogram/path/generate_measurements_path.py b/scripts/measurements_histogram/path/generate_measurements_path.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Generate weights for each window based on the number of windows, location
+of stations and receivers
+
+:copyright:
+    Wenjie Lei (lei@princeton.edu), 2016
+:license:
+    GNU Lesser General Public License, version 3 (LGPLv3)
+    (http://www.gnu.org/licenses/lgpl-3.0.en.html)
+"""
+from __future__ import print_function, division
+import os
+import json
+import argparse
+
+# #############################
+period_list = ["17_40", "40_100", "90_250"]
+
+superbase = "/lustre/atlas/proj-shared/geo111/Wenjie/DATA_M16"
+measurebase = os.path.join(superbase, "measure")
+stationbase = os.path.join(superbase, "stations")
+# #############################
+
+
+def load_txt(txtfile):
+    with open(txtfile, 'r') as fh:
+        return [line.rstrip() for line in fh]
+
+
+def check_file_exists(filename):
+    if not os.path.exists(filename):
+        raise ValueError("Missing file: %s" % filename)
+
+
+def generate_json_paths(eventlist, outputfile, mtype=""):
+    paths = {"input": {}, "outputdir": "./output%s" % mtype}
+
+    for event in eventlist:
+        event_info = {}
+        stationfile = os.path.join(stationbase, "%s.stations.json" % event)
+        check_file_exists(stationfile)
+        period_info = {}
+        for period in period_list:
+            measure_file = \
+                os.path.join(measurebase, "%s.%s.measure_adj.json%s"
+                             % (event, period, mtype))
+            check_file_exists(measure_file)
+            period_info[period] = {"measure_file": measure_file}
+        event_info = {"stationfile": stationfile,
+                      "period_info": period_info}
+
+        paths["input"][event] = event_info
+
+    print("Output dir json file: ", outputfile)
+    with open(outputfile, 'w') as f:
+        json.dump(paths, f, indent=2, sort_keys=True)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-f', action='store', dest='eventlist_file',
+                        required=True)
+    args = parser.parse_args()
+
+    eventlist = load_txt(args.eventlist_file)
+
+    generate_json_paths(eventlist, "window_weight.path.json", mtype="")
+
+    generate_json_paths(eventlist, "window_weight.filter.path.json",
+                        mtype=".filter")
diff --git a/scripts/measurements_histogram/plot_measurements.py b/scripts/measurements_histogram/plot_measurements.py
@@ -0,0 +1,169 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Plot the histogram of measurements. All the input files
+are specified in the path json file.
+
+:copyright:
+    Wenjie Lei (lei@princeton.edu), 2016
+:license:
+    GNU Lesser General Public License, version 3 (LGPLv3)
+    (http://www.gnu.org/licenses/lgpl-3.0.en.html)
+"""
+from __future__ import print_function, division
+import os
+import json
+import argparse
+import numpy as np
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+
+
+def load_txt(txtfile):
+    with open(txtfile, 'r') as fh:
+        return [line.rstrip() for line in fh]
+
+
+def load_json(fn):
+    with open(fn) as fh:
+        return json.load(fh)
+
+
+def dump_json(content, fn):
+    with open(fn, 'w') as fh:
+        json.dump(content, fh, indent=2, sort_keys=True)
+
+
+def check_file_exists(filename):
+    if not os.path.exists(filename):
+        raise ValueError("Missing file: %s" % filename)
+
+
+def load_one_measurefile(measure_file):
+    measure = load_json(measure_file)
+
+    dt = {}
+    dlna = {}
+    for sta, stainfo in measure.iteritems():
+        for chan, chaninfo in stainfo.iteritems():
+            comp = chan.split(".")[-1]
+            if comp not in dt:
+                dt[comp] = []
+            dt[comp].extend([m["dt"] for m in chaninfo])
+            if comp not in dlna:
+                dlna[comp] = []
+            dlna[comp].extend([m["dlna"] for m in chaninfo])
+
+    return dt, dlna
+
+
+def update_overall(dict_one, dict_all, pb):
+    if pb not in dict_all:
+        dict_all[pb] = {}
+    for comp in dict_one:
+        if comp not in dict_all[pb]:
+            dict_all[pb][comp] = []
+        dict_all[pb][comp].extend(dict_one[comp])
+
+
+def get_mean_and_std(dictv):
+    mean = {}
+    std = {}
+    for pb, pbinfo in dictv.iteritems():
+        mean[pb] = {}
+        std[pb] = {}
+        for comp, compinfo in pbinfo.iteritems():
+            mean[pb][comp] = np.mean(compinfo)
+            std[pb][comp] = np.std(compinfo)
+
+    return mean, std
+
+
+def stats_analysis(dts, dlnas, outputdir):
+    dt_mean, dt_std = get_mean_and_std(dts)
+    dlna_mean, dlna_std = get_mean_and_std(dlnas)
+
+    log_content = {"dt": {"mean": dt_mean, "std": dt_std},
+                   "dlna": {"mean": dlna_mean, "std": dlna_std}}
+
+    outputfn = os.path.join(outputdir, "measure.log.json")
+    print("log file: %s" % outputfn)
+    dump_json(log_content, outputfn)
+
+
+def load_measurements(inputs):
+    dts = {}
+    dlnas = {}
+    for ev, evinfo in inputs.iteritems():
+        for pb, pbinfo in evinfo["period_info"].iteritems():
+            _dt, _dlna = load_one_measurefile(pbinfo["measure_file"])
+            update_overall(_dt, dts, pb)
+            update_overall(_dlna, dlnas, pb)
+
+    return dts, dlnas
+
+
+def plot_hist(data, figname=None):
+    period_bands = ["17_40", "40_100", "90_250"]
+    components = ["BHR", "BHT", "BHZ"]
+
+    fig = plt.figure(figsize=(20, 20))
+
+    irow = 0
+    for pb in period_bands:
+        icol = 0
+        for comp in components:
+            idx = irow * 3 + icol + 1
+            plt.subplot(3, 3, idx)
+            plt.hist(data[pb][comp], bins=30)
+            mean = np.mean(data[pb][comp])
+            std = np.std(data[pb][comp])
+            xloc = plt.xlim()[0] + 0.05 * (plt.xlim()[1] - plt.xlim()[0])
+            plt.text(xloc, plt.ylim()[1] * 0.9, "mean: %.4f" %
+                     (mean))
+            plt.text(xloc, plt.ylim()[1] * 0.85, "std: %.4f" %
+                     (std))
+            if icol == 0:
+                plt.ylabel(pb)
+            if irow == 2:
+                plt.xlabel(comp)
+            icol += 1
+        irow += 1
+
+    print("Save figure to: %s" % figname)
+    plt.tight_layout()
+    plt.savefig(figname)
+    plt.close(fig)
+
+
+def plot_measures(dts, dlnas, outputdir):
+
+    figname = os.path.join(outputdir, "dt.histogram.pdf")
+    plot_hist(dts, figname=figname)
+
+    figname = os.path.join(outputdir, "dlna.histogram.pdf")
+    plot_hist(dlnas, figname=figname)
+
+
+def main(path):
+    inputs = path["input"]
+    outputdir = path["outputdir"]
+    print("Number of events: %d" % len(inputs))
+    if not os.path.exists(outputdir):
+        os.makedirs(outputdir)
+
+    dts, dlnas = load_measurements(inputs)
+
+    stats_analysis(dts, dlnas, outputdir)
+    plot_measures(dts, dlnas, outputdir)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('-f', action='store', dest='path',
+                        required=True)
+    args = parser.parse_args()
+
+    path = load_json(args.path)
+    main(path)
diff --git a/setup.py b/setup.py
@@ -32,7 +32,8 @@ def run_tests(self):
     'pypaw-convert_adjsrcs_from_asdf=pypaw.bins.convert_adjsrcs_from_asdf:main',   # NOQA
     'pypaw-convert_to_asdf=pypaw.bins.convert_to_asdf:main',
     'pypaw-convert_to_sac=pypaw.bins.convert_to_sac:main',
-    'pypaw-generate_stations_asdf=pypaw.bins.generate_stations_asdf:main'
+    'pypaw-generate_stations_asdf=pypaw.bins.generate_stations_asdf:main',
+    'pypaw-generate_adjoint_stations=pypaw.bins.generate_adjoint_stations:main'
 ]
 
 

Original file line number	Diff line number	Diff line change
`@@ -32,7 +32,8 @@ def run_tests(self):`
`32`	`32`	`'pypaw-convert_adjsrcs_from_asdf=pypaw.bins.convert_adjsrcs_from_asdf:main', # NOQA`
`33`	`33`	`'pypaw-convert_to_asdf=pypaw.bins.convert_to_asdf:main',`
`34`	`34`	`'pypaw-convert_to_sac=pypaw.bins.convert_to_sac:main',`
`35`		`- 'pypaw-generate_stations_asdf=pypaw.bins.generate_stations_asdf:main'`
	`35`	`+ 'pypaw-generate_stations_asdf=pypaw.bins.generate_stations_asdf:main',`
	`36`	`+ 'pypaw-generate_adjoint_stations=pypaw.bins.generate_adjoint_stations:main'`
`36`	`37`	`]`
`37`	`38`
`38`	`39`