From f37d4c59683d5875280036717de12837472b6786 Mon Sep 17 00:00:00 2001 From: Eve Kovacs Date: Wed, 11 Sep 2019 13:44:54 -0500 Subject: [PATCH 01/10] Add jackknife errors to angular correlation function; add some cosmetic options --- descqa/CorrelationsTwoPoint.py | 187 ++++++++++++++++--- descqa/configs/tpcf_Wang2013_rSDSS_jack.yaml | 79 ++++++++ 2 files changed, 244 insertions(+), 22 deletions(-) create mode 100644 descqa/configs/tpcf_Wang2013_rSDSS_jack.yaml diff --git a/descqa/CorrelationsTwoPoint.py b/descqa/CorrelationsTwoPoint.py index 101d7440..d7c077be 100644 --- a/descqa/CorrelationsTwoPoint.py +++ b/descqa/CorrelationsTwoPoint.py @@ -2,8 +2,11 @@ import os from collections import defaultdict import numpy as np +import re import scipy.special as scsp import treecorr +import healpy as hp +from sklearn.cluster import k_means from GCR import GCRQuery from .base import BaseValidationTest, TestResult @@ -58,9 +61,10 @@ def __init__(self, **kwargs): self.fig_xlabel = kwargs['fig_xlabel'] self.fig_ylabel = kwargs['fig_ylabel'] - self.fig_ylim = kwargs['fig_ylim'] + self.fig_ylim = kwargs.get('fig_ylim', None) self.fig_subplots_nrows, self.fig_subplots_ncols = kwargs.get('fig_subplots', (1, 1)) self.fig_subplot_groups = kwargs.get('fig_subplot_groups', [None]) + self.fig_xlim = kwargs.get('fig_xlim', None) self.treecorr_config = { 'min_sep': kwargs['min_sep'], @@ -70,6 +74,25 @@ def __init__(self, **kwargs): self.random_nside = kwargs.get('random_nside', 1024) self.random_mult = kwargs.get('random_mult', 3) + # jackknife errors + self.jackknife = kwargs.get('jackknife', False) + if self.jackknife: + self.N_jack = kwargs.get('N_jack', 30) + jackknife_quantities = kwargs.get('jackknife_quantities', + {'ra':['ra', 'ra_true'], 'dec':['dec', 'dec_true']}) + if 'ra' not in self.requested_columns or 'dec' not in self.requested_columns: + self.requested_columns.update(jackknife_quantities) + self.use_diagonal_only = kwargs.get('use_diagonal_only', True) + + self.r_validation_min = kwargs.get('r_validation_min', 1) + self.r_validation_max = kwargs.get('r_validation_max', 10) + + self.truncate_cat_name = kwargs.get('truncate_cat_name', False) + self.title_in_legend = kwargs.get('title_in_legend', False) + self.font_size = kwargs.get('font_size', 16) + self.legend_size = kwargs.get('legend_size', 10) + self.survey_label = kwargs.get('survey_label', '') + self.no_title = kwargs.get('no_title', False) @staticmethod def load_catalog_data(catalog_instance, requested_columns, test_samples): @@ -196,27 +219,65 @@ def plot_data_comparison(self, corr_data, catalog_name, output_dir): ax.loglog(self.validation_data[:, 0], self.validation_data[:, sample_data['data_col']], c=color, - label=sample_label) + label=' '.join([self.survey_label, sample_label])) if 'data_err_col' in sample_data: y1 = (self.validation_data[:, sample_data['data_col']] + self.validation_data[:, sample_data['data_err_col']]) y2 = (self.validation_data[:, sample_data['data_col']] - self.validation_data[:, sample_data['data_err_col']]) - y2[y2 <= 0] = self.fig_ylim[0]*0.9 - ax.fill_between(self.validation_data[:, 0], y1, y2, lw=0, color=color, alpha=0.2) - ax.errorbar(sample_corr[0], sample_corr[1], sample_corr[2], marker='o', ls='', c=color) + if self.fig_xlim is not None: + y2[y2 <= 0] = self.fig_ylim[0]*0.9 + ax.fill_between(self.validation_data[:, 0], y1, y2, lw=0, color=color, alpha=0.25) + ax.errorbar(sample_corr[0], sample_corr[1], sample_corr[2], + label=' '.join([catalog_name, sample_label]), + marker='o', ls='', c=color) - ax.legend(loc='best') - ax.set_xlabel(self.fig_xlabel) - ax.set_ylim(*self.fig_ylim) - ax.set_ylabel(self.fig_ylabel) - ax.set_title('{} vs. {}'.format(catalog_name, self.data_label), fontsize='medium') + self.decorate_plot(ax, catalog_name) fig.tight_layout() fig.savefig(os.path.join(output_dir, '{:s}.png'.format(self.test_name)), bbox_inches='tight') plt.close(fig) + def get_legend_title(self, test_samples, exclude='mstellar'): + legend_title = '' + filter_ids = list(set([k for v in test_samples.values() for k in v.keys() if exclude not in k])) + for filter_id in filter_ids: + legend_title = self.get_legend_subtitle(test_samples, filter_id=filter_id, legend_title=legend_title) + + return legend_title + + + @staticmethod + def get_legend_subtitle(test_samples, filter_id='z', legend_title=''): + legend_title = legend_title if len(legend_title) == 0 else '{}; '.format(legend_title) + min_values = [test_samples[k][filter_id].get('min', None) for k in test_samples if test_samples[k].get(filter_id, None) is not None] + max_values = [test_samples[k][filter_id].get('max', None) for k in test_samples if test_samples[k].get(filter_id, None) is not None] + min_title = '' + if len(min_values) > 0 and any([k != None for k in min_values]): + min_title = '{} < {}'.format(min([k for k in min_values if k is not None]), filter_id) + max_title = '' + if len(max_values) > 0 and any([k != None for k in max_values]): + max_values = [k for k in max_values if k is not None] + max_title = '${} < {}$'.format(filter_id, max(max_values)) if len(min_title) == 0 else '${} < {}$'.format(min_title, max(max_values)) + + return legend_title + max_title + + + def decorate_plot(self, ax, catalog_name): + title = '{} vs. {}'.format(catalog_name, self.data_label) + lgnd_title = self.get_legend_title(self.test_samples) if self.title_in_legend else None + ax.legend(loc='lower left', fontsize=self.legend_size, title=lgnd_title) + ax.set_xlabel(self.fig_xlabel, size=self.font_size) + if self.fig_ylim is not None: + ax.set_ylim(*self.fig_ylim) + if self.fig_xlim is not None: + ax.set_xlim(*self.fig_xlim) + ax.set_ylabel(self.fig_ylabel, size=self.font_size) + if not self.no_title: + ax.set_title(title, fontsize='medium') + + @staticmethod def score_and_test(corr_data): # pylint: disable=unused-argument """ Given the resultant correlations, compute the test score and return @@ -237,6 +298,70 @@ def score_and_test(corr_data): # pylint: disable=unused-argument return TestResult(inspect_only=True) + def get_jackknife_randoms(self, N_jack, catalog_data, ra='ra', dec='dec'): + """ + Computes the jackknife regions and random catalogs for each region + Parameters + ---------- + N_jack : number of regions + catalog_data : input catalog + + Returns + ------- + jack_labels: array of regions in catalog data + randoms: dict of randoms labeled by region + """ + catalogs = {} + #cluster + nn = np.stack((catalog_data[ra], catalog_data[dec]), axis=1) + _, jack_labels, _ = k_means(n_clusters=N_jack, random_state=0, X=nn) + + randoms = {} + for nj in range(N_jack): + catalog_data_jk = dict(zip(catalog_data.keys(), [v[(jack_labels != nj)] for v in catalog_data.values()])) + rand_cat, rr = self.generate_processed_randoms(catalog_data_jk) #get randoms for this footprint + #print('nj = {}, area = {:.2f}'.format(nj, self.check_footprint(catalog_data_jk))) #check footprint + randoms[str(nj)] = {'ran': rand_cat, 'rr':rr} + + return jack_labels, randoms + + def get_jackknife_errors(self, N_jack, catalog_data, sample_conditions, r, xi, jack_labels, randoms, + diagonal_errors=True): + + #run treecorr for jackknife regions + Nrbins = len(r) + Njack_array = np.zeros((N_jack, Nrbins), dtype=np.float) + print(sample_conditions) + for nj in range(N_jack): + catalog_data_jk = dict(zip(catalog_data.keys(), [v[(jack_labels != nj)] for v in catalog_data.values()])) + tmp_catalog_data = self.create_test_sample(catalog_data_jk, sample_conditions) #apply sample cut + # run treecorr + xi_rad, Njack_array[nj], _ = self.run_treecorr(catalog_data=tmp_catalog_data, + treecorr_rand_cat=randoms[str(nj)]['ran'], + rr=randoms[str(nj)]['rr'], + output_file_name=None) + print(nj, Njack_array[nj]) + + covariance = np.zeros((Nrbins, Nrbins)) + for i in range(Nrbins): + if diagonal_errors: + for njack in Njack_array: + covariance[i][i] += (N_jack - 1.)/N_jack * (xi[i] - njack[i]) ** 2 + else: + for j in range(Nrbins): + for njack in Njack_array: + covariance[i][j] += (N_jack - 1.)/N_jack * (xi[i] - njack[i]) * (xi[j] - njack[j]) + print('bin/cov', i, covariance[i][i]) + + return covariance + + def check_footprint(self, catalog_data): + pix_footprint = get_healpixel_footprint(catalog_data['ra'], catalog_data['dec'], self.random_nside) + area_footprint = 4.*np.pi*(180./np.pi)**2*len(pix_footprint)/hp.nside2npix(self.random_nside) + + return area_footprint + + class CorrelationsAngularTwoPoint(CorrelationUtilities): """ Validation test for an angular 2pt correlation function. @@ -252,11 +377,16 @@ def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): catalog_data = self.load_catalog_data(catalog_instance=catalog_instance, requested_columns=self.requested_columns, test_samples=self.test_samples) - if not catalog_data: return TestResult(skipped=True, summary='Missing requested quantities') - rand_cat, rr = self.generate_processed_randoms(catalog_data) + if self.truncate_cat_name: + catalog_name = re.split('_', catalog_name)[0] + + rand_cat, rr = self.generate_processed_randoms(catalog_data) #assumes ra and dec exist + + if self.jackknife: #evaluate randoms for jackknife footprints + jack_labels, randoms = self.get_jackknife_randoms(self.N_jack, catalog_data) correlation_data = dict() for sample_name, sample_conditions in self.test_samples.items(): @@ -278,6 +408,14 @@ def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): rr=rr, output_file_name=output_treecorr_filepath) + print('xi_rad', len(xi_rad)) + #jackknife errors + if self.jackknife: + covariance = self.get_jackknife_errors(self.N_jack, catalog_data, sample_conditions, + xi_rad, xi, jack_labels, randoms, + diagonal_errors=self.use_diagonal_only) + xi_sig = np.sqrt(np.diag(covariance)) + correlation_data[sample_name] = (xi_rad, xi, xi_sig) self.plot_data_comparison(corr_data=correlation_data, @@ -348,7 +486,8 @@ def run_treecorr(self, catalog_data, treecorr_rand_cat, rr, output_file_name): dr.process(treecorr_rand_cat, cat) rd.process(cat, treecorr_rand_cat) - dd.write(output_file_name, rr, dr, rd) + if output_file_name is not None: + dd.write(output_file_name, rr, dr, rd) xi, var_xi = dd.calculateXi(rr, dr, rd) xi_rad = np.exp(dd.meanlogr) @@ -377,6 +516,10 @@ def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): if not catalog_data: return TestResult(skipped=True, summary='Missing requested quantities') + test = self.get_legend_title(self.test_samples) + + if self.truncate_cat_name: + catalog_name = re.split('_', catalog_name)[0] rand_ra, rand_dec = generate_uniform_random_ra_dec_footprint( catalog_data['ra'].size*self.random_mult, get_healpixel_footprint(catalog_data['ra'], catalog_data['dec'], self.random_nside), @@ -572,19 +715,19 @@ def plot_data_comparison(self, corr_data, catalog_name, output_dir): ax.loglog(sample_corr[0], p_law, c=color, - label=sample_label) + label=' '.join([self.survey_label, sample_label])) + ax.fill_between(sample_corr[0], p_law - p_law_err, p_law + p_law_err, lw=0, color=color, alpha=0.2) - ax.errorbar(sample_corr[0], sample_corr[1], sample_corr[2], marker='o', ls='', c=color) + ax.errorbar(sample_corr[0], sample_corr[1], sample_corr[2], marker='o', ls='', c=color, + label=' '.join([catalog_name, sample_label])) - ax.legend(loc='best') - ax.set_xlabel(self.fig_xlabel) - ax.set_ylim(*self.fig_ylim) - ax.set_ylabel(self.fig_ylabel) - ax.set_title('{} vs. {}'.format(catalog_name, self.data_label), fontsize='medium') + ax.fill_between([self.r_validation_min, self.r_validation_max], [0, 0], [10**4, 10**4], + alpha=0.15, color='grey') #validation region + self.decorate_plot(ax, catalog_name) fig.tight_layout() fig.savefig(os.path.join(output_dir, '{:s}.png'.format(self.test_name)), bbox_inches='tight') plt.close(fig) @@ -597,8 +740,8 @@ def score_and_test(self, corr_data): chi_per_nu = 0 total_sample = 0 rbins = list(corr_data.values()).pop()[0] - r_idx_min = np.searchsorted(rbins, 1.) - r_idx_max = np.searchsorted(rbins, 10., side='right') + r_idx_min = np.searchsorted(rbins, self.r_validation_min) + r_idx_max = np.searchsorted(rbins, self.r_validation_max, side='right') for sample_name in self.test_samples: sample_corr = corr_data[sample_name] sample_data = self.test_data[sample_name] diff --git a/descqa/configs/tpcf_Wang2013_rSDSS_jack.yaml b/descqa/configs/tpcf_Wang2013_rSDSS_jack.yaml new file mode 100644 index 00000000..4d73300d --- /dev/null +++ b/descqa/configs/tpcf_Wang2013_rSDSS_jack.yaml @@ -0,0 +1,79 @@ +subclass_name: CorrelationsTwoPoint.CorrelationsAngularTwoPoint + +# Catalog columns to attempt to load. The simplified names (e.g. ra, dec) are +# the names that we be used to cut on in test_samples. Make sure these match. +requested_columns: + ra: + - ra + - ra_true + dec: + - dec + - dec_true + mag: + - mag_r_sdss + - mag_r_des + - mag_r_lsst + - mag_true_r_sdss + - mag_true_r_des + - mag_true_r_lsst + +# Definition of samples and cuts to apply. The names of these columns must +# match the simple column name definitions above. +test_samples: + r_17_18: + mag: {min: 17, max: 18} + r_18_19: + mag: {min: 18, max: 19} + r_19_20: + mag: {min: 19, max: 20} + r_20_21: + mag: {min: 20, max: 21} +# r_17_21: +# mag: {min: 17, max: 21} + +# Output file naming format for output of the resultant correlation values. +output_filename_template: 'w_theta_r_{}.dat' + +# Name of file and columns to load and compare against the test samples. +data_filename: 'tpcf/Wang_2013_MNRAS_stt450_Table2.txt' +data_label: 'Y.Wang+2013' +# Specify the columns to load from the data for comparison. The names here +# should match the sample names from test_samples. +test_data: + r_17_18: {data_col: 3, data_err_col: 4} + r_18_19: {data_col: 5, data_err_col: 6} + r_19_20: {data_col: 7, data_err_col: 8} + r_20_21: {data_col: 9, data_err_col: 10} +# r_17_21: {data_col: 1, data_err_col: 2} + +#Jackknife Parameters +jackknife: true +N_jack: 20 +jackknife_quantities: + ra: + - ra + - ra_true + dec: + - dec + - dec_true +use_diagonal_only: true + +# Plotting configuration. +fig_xlabel: '$\theta\quad[{\rm deg}]}$' +fig_ylabel: '$w(\theta)$' +fig_ylim: [0.0001, 5.0] +fig_xlim: [0.008, 2] +test_sample_labels: + r_17_18: '$17 < r < 18$' + r_18_19: '$18 < r < 19$' + r_19_20: '$19 < r < 20$' + r_20_21: '$20 < r < 21$' + r_17_21: '$17 < r < 21$' + +#Treecorr parameters +min_sep: 0.01 +max_sep: 1.3 +bin_size: 0.5 + +description: | + Compare angular correlation functions of catalog and Wang et al (2013) SDSS r-band observations From f403ab9ca0d58acbcf3dea2b4e0644edca2d4244 Mon Sep 17 00:00:00 2001 From: Eve Kovacs Date: Thu, 12 Sep 2019 15:53:34 -0500 Subject: [PATCH 02/10] Fix travis issues --- descqa/CorrelationsTwoPoint.py | 171 +++++++++++++++++++-------------- 1 file changed, 97 insertions(+), 74 deletions(-) diff --git a/descqa/CorrelationsTwoPoint.py b/descqa/CorrelationsTwoPoint.py index d7c077be..d717a8e0 100644 --- a/descqa/CorrelationsTwoPoint.py +++ b/descqa/CorrelationsTwoPoint.py @@ -1,8 +1,8 @@ from __future__ import print_function, division, unicode_literals, absolute_import import os from collections import defaultdict -import numpy as np import re +import numpy as np import scipy.special as scsp import treecorr import healpy as hp @@ -12,8 +12,8 @@ from .base import BaseValidationTest, TestResult from .plotting import plt from .utils import (generate_uniform_random_ra_dec_footprint, - get_healpixel_footprint, - generate_uniform_random_dist) + get_healpixel_footprint, + generate_uniform_random_dist) __all__ = ['CorrelationsAngularTwoPoint', 'CorrelationsProjectedTwoPoint', 'DEEP2StellarMassTwoPoint'] @@ -217,14 +217,14 @@ def plot_data_comparison(self, corr_data, catalog_name, output_dir): sample_label = self.test_sample_labels.get(sample_name) ax.loglog(self.validation_data[:, 0], - self.validation_data[:, sample_data['data_col']], - c=color, - label=' '.join([self.survey_label, sample_label])) + self.validation_data[:, sample_data['data_col']], + c=color, + label=' '.join([self.survey_label, sample_label])) if 'data_err_col' in sample_data: y1 = (self.validation_data[:, sample_data['data_col']] + - self.validation_data[:, sample_data['data_err_col']]) + self.validation_data[:, sample_data['data_err_col']]) y2 = (self.validation_data[:, sample_data['data_col']] - - self.validation_data[:, sample_data['data_err_col']]) + self.validation_data[:, sample_data['data_err_col']]) if self.fig_xlim is not None: y2[y2 <= 0] = self.fig_ylim[0]*0.9 ax.fill_between(self.validation_data[:, 0], y1, y2, lw=0, color=color, alpha=0.25) @@ -240,21 +240,25 @@ def plot_data_comparison(self, corr_data, catalog_name, output_dir): def get_legend_title(self, test_samples, exclude='mstellar'): + """ + """ legend_title = '' - filter_ids = list(set([k for v in test_samples.values() for k in v.keys() if exclude not in k])) + filter_ids = list(set([k for v in test_samples.values() for k in v.keys() if exclude not in k])) for filter_id in filter_ids: legend_title = self.get_legend_subtitle(test_samples, filter_id=filter_id, legend_title=legend_title) - + return legend_title @staticmethod def get_legend_subtitle(test_samples, filter_id='z', legend_title=''): + """ + """ legend_title = legend_title if len(legend_title) == 0 else '{}; '.format(legend_title) min_values = [test_samples[k][filter_id].get('min', None) for k in test_samples if test_samples[k].get(filter_id, None) is not None] max_values = [test_samples[k][filter_id].get('max', None) for k in test_samples if test_samples[k].get(filter_id, None) is not None] min_title = '' - if len(min_values) > 0 and any([k != None for k in min_values]): + if len(min_values) > 0 and any([k is not None for k in min_values]): min_title = '{} < {}'.format(min([k for k in min_values if k is not None]), filter_id) max_title = '' if len(max_values) > 0 and any([k != None for k in max_values]): @@ -265,6 +269,9 @@ def get_legend_subtitle(test_samples, filter_id='z', legend_title=''): def decorate_plot(self, ax, catalog_name): + """ + Decorates plot with axes labels, title, etc. + """ title = '{} vs. {}'.format(catalog_name, self.data_label) lgnd_title = self.get_legend_title(self.test_samples) if self.title_in_legend else None ax.legend(loc='lower left', fontsize=self.legend_size, title=lgnd_title) @@ -311,7 +318,6 @@ def get_jackknife_randoms(self, N_jack, catalog_data, ra='ra', dec='dec'): jack_labels: array of regions in catalog data randoms: dict of randoms labeled by region """ - catalogs = {} #cluster nn = np.stack((catalog_data[ra], catalog_data[dec]), axis=1) _, jack_labels, _ = k_means(n_clusters=N_jack, random_state=0, X=nn) @@ -327,19 +333,34 @@ def get_jackknife_randoms(self, N_jack, catalog_data, ra='ra', dec='dec'): def get_jackknife_errors(self, N_jack, catalog_data, sample_conditions, r, xi, jack_labels, randoms, diagonal_errors=True): - + """ + Computes jacknife errors + Parameters + ---------- + N_jack : number of regions + catalog_data : input catalog + sample_conditions : sample selections + r : r data for full region + xi : correlation data for full region + jack_labels: array of regions in catalog data + randoms: dict of randoms labeled by region + Returns + -------- + covariance : covariance matrix + """ #run treecorr for jackknife regions Nrbins = len(r) Njack_array = np.zeros((N_jack, Nrbins), dtype=np.float) print(sample_conditions) for nj in range(N_jack): - catalog_data_jk = dict(zip(catalog_data.keys(), [v[(jack_labels != nj)] for v in catalog_data.values()])) + catalog_data_jk = dict(zip(catalog_data.keys(), + [v[(jack_labels != nj)] for v in catalog_data.values()])) tmp_catalog_data = self.create_test_sample(catalog_data_jk, sample_conditions) #apply sample cut # run treecorr xi_rad, Njack_array[nj], _ = self.run_treecorr(catalog_data=tmp_catalog_data, - treecorr_rand_cat=randoms[str(nj)]['ran'], - rr=randoms[str(nj)]['rr'], - output_file_name=None) + treecorr_rand_cat=randoms[str(nj)]['ran'], + rr=randoms[str(nj)]['rr'], + output_file_name=None) print(nj, Njack_array[nj]) covariance = np.zeros((Nrbins, Nrbins)) @@ -356,7 +377,10 @@ def get_jackknife_errors(self, N_jack, catalog_data, sample_conditions, r, xi, j return covariance def check_footprint(self, catalog_data): - pix_footprint = get_healpixel_footprint(catalog_data['ra'], catalog_data['dec'], self.random_nside) + """ + """ + pix_footprint = get_healpixel_footprint(catalog_data['ra'], + catalog_data['dec'], self.random_nside) area_footprint = 4.*np.pi*(180./np.pi)**2*len(pix_footprint)/hp.nside2npix(self.random_nside) return area_footprint @@ -372,59 +396,6 @@ def __init__(self, **kwargs): self.treecorr_config['sep_units'] = 'deg' - def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): - - catalog_data = self.load_catalog_data(catalog_instance=catalog_instance, - requested_columns=self.requested_columns, - test_samples=self.test_samples) - if not catalog_data: - return TestResult(skipped=True, summary='Missing requested quantities') - - if self.truncate_cat_name: - catalog_name = re.split('_', catalog_name)[0] - - rand_cat, rr = self.generate_processed_randoms(catalog_data) #assumes ra and dec exist - - if self.jackknife: #evaluate randoms for jackknife footprints - jack_labels, randoms = self.get_jackknife_randoms(self.N_jack, catalog_data) - - correlation_data = dict() - for sample_name, sample_conditions in self.test_samples.items(): - tmp_catalog_data = self.create_test_sample( - catalog_data, sample_conditions) - - with open(os.path.join(output_dir, 'galaxy_count.dat'), 'a') as f: - f.write('{} {}\n'.format(sample_name, len(tmp_catalog_data['ra']))) - - if not len(tmp_catalog_data['ra']): - continue - - output_treecorr_filepath = os.path.join( - output_dir, self.output_filename_template.format(sample_name)) - - xi_rad, xi, xi_sig = self.run_treecorr( - catalog_data=tmp_catalog_data, - treecorr_rand_cat=rand_cat, - rr=rr, - output_file_name=output_treecorr_filepath) - - print('xi_rad', len(xi_rad)) - #jackknife errors - if self.jackknife: - covariance = self.get_jackknife_errors(self.N_jack, catalog_data, sample_conditions, - xi_rad, xi, jack_labels, randoms, - diagonal_errors=self.use_diagonal_only) - xi_sig = np.sqrt(np.diag(covariance)) - - correlation_data[sample_name] = (xi_rad, xi, xi_sig) - - self.plot_data_comparison(corr_data=correlation_data, - catalog_name=catalog_name, - output_dir=output_dir) - - return self.score_and_test(correlation_data) - - def generate_processed_randoms(self, catalog_data): """ Create and process random data for the 2pt correlation function. @@ -496,6 +467,60 @@ def run_treecorr(self, catalog_data, treecorr_rand_cat, rr, output_file_name): return xi_rad, xi, xi_sig + def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): + + catalog_data = self.load_catalog_data(catalog_instance=catalog_instance, + requested_columns=self.requested_columns, + test_samples=self.test_samples) + if not catalog_data: + return TestResult(skipped=True, summary='Missing requested quantities') + + if self.truncate_cat_name: + catalog_name = re.split('_', catalog_name)[0] + + rand_cat, rr = self.generate_processed_randoms(catalog_data) #assumes ra and dec exist + + if self.jackknife: #evaluate randoms for jackknife footprints + jack_labels, randoms = self.get_jackknife_randoms(self.N_jack, catalog_data) + + correlation_data = dict() + for sample_name, sample_conditions in self.test_samples.items(): + tmp_catalog_data = self.create_test_sample( + catalog_data, sample_conditions) + + with open(os.path.join(output_dir, 'galaxy_count.dat'), 'a') as f: + f.write('{} {}\n'.format(sample_name, len(tmp_catalog_data['ra']))) + + if not len(tmp_catalog_data['ra']): + continue + + output_treecorr_filepath = os.path.join( + output_dir, self.output_filename_template.format(sample_name)) + + xi_rad, xi, xi_sig = self.run_treecorr( + catalog_data=tmp_catalog_data, + treecorr_rand_cat=rand_cat, + rr=rr, + output_file_name=output_treecorr_filepath) + + print('xi_rad', len(xi_rad)) + #jackknife errors + if self.jackknife: + covariance = self.get_jackknife_errors(self.N_jack, catalog_data, sample_conditions, + xi_rad, xi, jack_labels, randoms, + diagonal_errors=self.use_diagonal_only) + xi_sig = np.sqrt(np.diag(covariance)) + + correlation_data[sample_name] = (xi_rad, xi, xi_sig) + + self.plot_data_comparison(corr_data=correlation_data, + catalog_name=catalog_name, + output_dir=output_dir) + + return self.score_and_test(correlation_data) + + + class CorrelationsProjectedTwoPoint(CorrelationUtilities): """ Validation test for an radial 2pt correlation function. @@ -516,8 +541,6 @@ def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): if not catalog_data: return TestResult(skipped=True, summary='Missing requested quantities') - test = self.get_legend_title(self.test_samples) - if self.truncate_cat_name: catalog_name = re.split('_', catalog_name)[0] rand_ra, rand_dec = generate_uniform_random_ra_dec_footprint( @@ -716,7 +739,7 @@ def plot_data_comparison(self, corr_data, catalog_name, output_dir): p_law, c=color, label=' '.join([self.survey_label, sample_label])) - + ax.fill_between(sample_corr[0], p_law - p_law_err, p_law + p_law_err, From 2e1dcc127cf7e65cf27d529f56d76c61c127a614 Mon Sep 17 00:00:00 2001 From: Eve Kovacs Date: Thu, 12 Sep 2019 18:03:14 -0500 Subject: [PATCH 03/10] Fix travis issues --- descqa/CorrelationsTwoPoint.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/descqa/CorrelationsTwoPoint.py b/descqa/CorrelationsTwoPoint.py index d717a8e0..e8ee0ffc 100644 --- a/descqa/CorrelationsTwoPoint.py +++ b/descqa/CorrelationsTwoPoint.py @@ -261,7 +261,7 @@ def get_legend_subtitle(test_samples, filter_id='z', legend_title=''): if len(min_values) > 0 and any([k is not None for k in min_values]): min_title = '{} < {}'.format(min([k for k in min_values if k is not None]), filter_id) max_title = '' - if len(max_values) > 0 and any([k != None for k in max_values]): + if len(max_values) > 0 and any([k is not None for k in max_values]): max_values = [k for k in max_values if k is not None] max_title = '${} < {}$'.format(filter_id, max(max_values)) if len(min_title) == 0 else '${} < {}$'.format(min_title, max(max_values)) @@ -305,13 +305,15 @@ def score_and_test(corr_data): # pylint: disable=unused-argument return TestResult(inspect_only=True) - def get_jackknife_randoms(self, N_jack, catalog_data, ra='ra', dec='dec'): + @staticmethod + def get_jackknife_randoms(N_jack, catalog_data, generate_randoms, ra='ra', dec='dec'): """ Computes the jackknife regions and random catalogs for each region Parameters ---------- N_jack : number of regions catalog_data : input catalog + generate_randoms: function to generate randoms (eg self.generate_processed_randoms) Returns ------- @@ -325,14 +327,15 @@ def get_jackknife_randoms(self, N_jack, catalog_data, ra='ra', dec='dec'): randoms = {} for nj in range(N_jack): catalog_data_jk = dict(zip(catalog_data.keys(), [v[(jack_labels != nj)] for v in catalog_data.values()])) - rand_cat, rr = self.generate_processed_randoms(catalog_data_jk) #get randoms for this footprint + rand_cat, rr = generate_randoms(catalog_data_jk) #get randoms for this footprint #print('nj = {}, area = {:.2f}'.format(nj, self.check_footprint(catalog_data_jk))) #check footprint randoms[str(nj)] = {'ran': rand_cat, 'rr':rr} return jack_labels, randoms - def get_jackknife_errors(self, N_jack, catalog_data, sample_conditions, r, xi, jack_labels, randoms, - diagonal_errors=True): + @staticmethod + def get_jackknife_errors(N_jack, catalog_data, sample_conditions, r, xi, jack_labels, randoms, + run_treecorr, diagonal_errors=True): """ Computes jacknife errors Parameters @@ -344,6 +347,8 @@ def get_jackknife_errors(self, N_jack, catalog_data, sample_conditions, r, xi, j xi : correlation data for full region jack_labels: array of regions in catalog data randoms: dict of randoms labeled by region + run_treecorr: method to run treecorr + Returns -------- covariance : covariance matrix @@ -357,9 +362,9 @@ def get_jackknife_errors(self, N_jack, catalog_data, sample_conditions, r, xi, j [v[(jack_labels != nj)] for v in catalog_data.values()])) tmp_catalog_data = self.create_test_sample(catalog_data_jk, sample_conditions) #apply sample cut # run treecorr - xi_rad, Njack_array[nj], _ = self.run_treecorr(catalog_data=tmp_catalog_data, + _, Njack_array[nj], _ = run_treecorr(catalog_data=tmp_catalog_data, treecorr_rand_cat=randoms[str(nj)]['ran'], - rr=randoms[str(nj)]['rr'], + rr=randoms[str(nj)]['rr'], output_file_name=None) print(nj, Njack_array[nj]) @@ -481,7 +486,8 @@ def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): rand_cat, rr = self.generate_processed_randoms(catalog_data) #assumes ra and dec exist if self.jackknife: #evaluate randoms for jackknife footprints - jack_labels, randoms = self.get_jackknife_randoms(self.N_jack, catalog_data) + jack_labels, randoms = self.get_jackknife_randoms(self.N_jack, catalog_data, + self.generate_processed_randoms) correlation_data = dict() for sample_name, sample_conditions in self.test_samples.items(): @@ -503,11 +509,11 @@ def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): rr=rr, output_file_name=output_treecorr_filepath) - print('xi_rad', len(xi_rad)) #jackknife errors if self.jackknife: covariance = self.get_jackknife_errors(self.N_jack, catalog_data, sample_conditions, xi_rad, xi, jack_labels, randoms, + self.run_treecorr, diagonal_errors=self.use_diagonal_only) xi_sig = np.sqrt(np.diag(covariance)) @@ -543,6 +549,7 @@ def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): if self.truncate_cat_name: catalog_name = re.split('_', catalog_name)[0] + rand_ra, rand_dec = generate_uniform_random_ra_dec_footprint( catalog_data['ra'].size*self.random_mult, get_healpixel_footprint(catalog_data['ra'], catalog_data['dec'], self.random_nside), From 0cd7ef46476c5d538fa93edbdf96a87b690d4ebd Mon Sep 17 00:00:00 2001 From: evevkovacs Date: Mon, 16 Sep 2019 11:01:40 -0700 Subject: [PATCH 04/10] Removed some extra print statements --- descqa/CorrelationsTwoPoint.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/descqa/CorrelationsTwoPoint.py b/descqa/CorrelationsTwoPoint.py index e8ee0ffc..ade58bc5 100644 --- a/descqa/CorrelationsTwoPoint.py +++ b/descqa/CorrelationsTwoPoint.py @@ -333,8 +333,7 @@ def get_jackknife_randoms(N_jack, catalog_data, generate_randoms, ra='ra', dec=' return jack_labels, randoms - @staticmethod - def get_jackknife_errors(N_jack, catalog_data, sample_conditions, r, xi, jack_labels, randoms, + def get_jackknife_errors(self, N_jack, catalog_data, sample_conditions, r, xi, jack_labels, randoms, run_treecorr, diagonal_errors=True): """ Computes jacknife errors @@ -366,7 +365,6 @@ def get_jackknife_errors(N_jack, catalog_data, sample_conditions, r, xi, jack_la treecorr_rand_cat=randoms[str(nj)]['ran'], rr=randoms[str(nj)]['rr'], output_file_name=None) - print(nj, Njack_array[nj]) covariance = np.zeros((Nrbins, Nrbins)) for i in range(Nrbins): @@ -377,7 +375,6 @@ def get_jackknife_errors(N_jack, catalog_data, sample_conditions, r, xi, jack_la for j in range(Nrbins): for njack in Njack_array: covariance[i][j] += (N_jack - 1.)/N_jack * (xi[i] - njack[i]) * (xi[j] - njack[j]) - print('bin/cov', i, covariance[i][i]) return covariance From 71ea8e46ab9c1b530aa949a0b9e454e3cfaf97ad Mon Sep 17 00:00:00 2001 From: evevkovacs Date: Thu, 3 Oct 2019 09:03:01 -0700 Subject: [PATCH 05/10] Changes for adding object catalog to test --- descqa/CorrelationsTwoPoint.py | 11 +++++++++-- descqa/configs/tpcf_Wang2013_rSDSS.yaml | 1 + descqa/configs/tpcf_Wang2013_rSDSS_jack.yaml | 1 + 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/descqa/CorrelationsTwoPoint.py b/descqa/CorrelationsTwoPoint.py index ade58bc5..92aa2f1f 100644 --- a/descqa/CorrelationsTwoPoint.py +++ b/descqa/CorrelationsTwoPoint.py @@ -139,6 +139,10 @@ def load_catalog_data(catalog_instance, requested_columns, test_samples): col_value_maxs[col_key].append(condition['max']) filters = [(np.isfinite, c) for c in colnames.values()] + + if catalog_instance.has_quantity('extendedness'): + filters.append('extendedness == 1') + for col_key, col_name in colnames.items(): if col_key in col_value_mins and col_value_mins[col_key]: filters.append('{} >= {}'.format(col_name, min(col_value_mins[col_key]))) @@ -481,7 +485,9 @@ def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): catalog_name = re.split('_', catalog_name)[0] rand_cat, rr = self.generate_processed_randoms(catalog_data) #assumes ra and dec exist - + with open(os.path.join(output_dir, 'galaxy_count.dat'), 'a') as f: + f.write('Random (= catalog) Area = {:.1f} sq. deg.\n'.format(self.check_footprint(catalog_data))) + if self.jackknife: #evaluate randoms for jackknife footprints jack_labels, randoms = self.get_jackknife_randoms(self.N_jack, catalog_data, self.generate_processed_randoms) @@ -492,7 +498,8 @@ def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): catalog_data, sample_conditions) with open(os.path.join(output_dir, 'galaxy_count.dat'), 'a') as f: - f.write('{} {}\n'.format(sample_name, len(tmp_catalog_data['ra']))) + f.write('{} {} {:.1f} sq. deg.\n'.format(sample_name, len(tmp_catalog_data['ra']), + self.check_footprint(tmp_catalog_data))) #print area if not len(tmp_catalog_data['ra']): continue diff --git a/descqa/configs/tpcf_Wang2013_rSDSS.yaml b/descqa/configs/tpcf_Wang2013_rSDSS.yaml index 6cbdbfe7..edefdad4 100644 --- a/descqa/configs/tpcf_Wang2013_rSDSS.yaml +++ b/descqa/configs/tpcf_Wang2013_rSDSS.yaml @@ -16,6 +16,7 @@ requested_columns: - mag_true_r_sdss - mag_true_r_des - mag_true_r_lsst + - mag_r_cModel # Definition of samples and cuts to apply. The names of these columns must # match the simple column name definitions above. diff --git a/descqa/configs/tpcf_Wang2013_rSDSS_jack.yaml b/descqa/configs/tpcf_Wang2013_rSDSS_jack.yaml index 4d73300d..95a5933b 100644 --- a/descqa/configs/tpcf_Wang2013_rSDSS_jack.yaml +++ b/descqa/configs/tpcf_Wang2013_rSDSS_jack.yaml @@ -16,6 +16,7 @@ requested_columns: - mag_true_r_sdss - mag_true_r_des - mag_true_r_lsst + - mag_r_CModel # Definition of samples and cuts to apply. The names of these columns must # match the simple column name definitions above. From 3ef91c8085baae0c23bf4adcd9bc041367a0dd26 Mon Sep 17 00:00:00 2001 From: evevkovacs Date: Thu, 3 Oct 2019 11:42:39 -0700 Subject: [PATCH 06/10] Change to using cModel mag in test for object catalog --- descqa/apparent_mag_func_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/descqa/apparent_mag_func_test.py b/descqa/apparent_mag_func_test.py index 101d1fb8..4cc802a8 100644 --- a/descqa/apparent_mag_func_test.py +++ b/descqa/apparent_mag_func_test.py @@ -61,7 +61,7 @@ def __init__(self, band='r', band_lim=(24.0, 27.5), fractional_tol=0.4, observat 'mag_true_{}_des', 'mag_{}_hsc', 'mag_true_{}_hsc', - 'mag_{}') + 'mag_{}_cModel') self.possible_mag_fields = [f.format(band) for f in possible_mag_fields] # attach some attributes to the test From 7f1b545730f1b1916432472915b9f5fcdc1825ca Mon Sep 17 00:00:00 2001 From: evevkovacs Date: Mon, 7 Oct 2019 12:18:16 -0700 Subject: [PATCH 07/10] improve error message --- descqa/CorrelationsTwoPoint.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/descqa/CorrelationsTwoPoint.py b/descqa/CorrelationsTwoPoint.py index 92aa2f1f..e64c4971 100644 --- a/descqa/CorrelationsTwoPoint.py +++ b/descqa/CorrelationsTwoPoint.py @@ -479,7 +479,9 @@ def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): requested_columns=self.requested_columns, test_samples=self.test_samples) if not catalog_data: - return TestResult(skipped=True, summary='Missing requested quantities') + cols = [i for c in self.requested_columns.values() for i in c] + return TestResult(skipped=True, + summary='Missing requested quantities {}'.format(', '.join(cols))) if self.truncate_cat_name: catalog_name = re.split('_', catalog_name)[0] From 1a6f4271a64c0a2d2ac5ddb64d04b14e4beff218 Mon Sep 17 00:00:00 2001 From: evevkovacs Date: Wed, 9 Oct 2019 07:59:49 -0700 Subject: [PATCH 08/10] Fix typo in cModel --- descqa/configs/tpcf_Wang2013_rSDSS_jack.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/descqa/configs/tpcf_Wang2013_rSDSS_jack.yaml b/descqa/configs/tpcf_Wang2013_rSDSS_jack.yaml index 95a5933b..c4742fd4 100644 --- a/descqa/configs/tpcf_Wang2013_rSDSS_jack.yaml +++ b/descqa/configs/tpcf_Wang2013_rSDSS_jack.yaml @@ -16,7 +16,7 @@ requested_columns: - mag_true_r_sdss - mag_true_r_des - mag_true_r_lsst - - mag_r_CModel + - mag_r_cModel # Definition of samples and cuts to apply. The names of these columns must # match the simple column name definitions above. From 707855ea2164283274675d87f57059e352994025 Mon Sep 17 00:00:00 2001 From: evevkovacs Date: Wed, 9 Oct 2019 15:54:28 -0700 Subject: [PATCH 09/10] Minor update to order of possible_mag_fields so object add-on runs correctly --- descqa/apparent_mag_func_test.py | 5 +++-- descqa/configs/tpcf_Wang2013_rSDSS.yaml | 2 +- descqa/configs/tpcf_Wang2013_rSDSS_jack.yaml | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/descqa/apparent_mag_func_test.py b/descqa/apparent_mag_func_test.py index 4cc802a8..195ae36e 100644 --- a/descqa/apparent_mag_func_test.py +++ b/descqa/apparent_mag_func_test.py @@ -53,7 +53,8 @@ def __init__(self, band='r', band_lim=(24.0, 27.5), fractional_tol=0.4, observat self.min_mag = kwargs.get('min_mag', 19.) # catalog quantities needed - possible_mag_fields = ('mag_{}_lsst', + possible_mag_fields = ('mag_{}_cModel', + 'mag_{}_lsst', 'mag_true_{}_lsst', 'mag_{}_sdss', 'mag_true_{}_sdss', @@ -61,7 +62,7 @@ def __init__(self, band='r', band_lim=(24.0, 27.5), fractional_tol=0.4, observat 'mag_true_{}_des', 'mag_{}_hsc', 'mag_true_{}_hsc', - 'mag_{}_cModel') + ) self.possible_mag_fields = [f.format(band) for f in possible_mag_fields] # attach some attributes to the test diff --git a/descqa/configs/tpcf_Wang2013_rSDSS.yaml b/descqa/configs/tpcf_Wang2013_rSDSS.yaml index edefdad4..a8a44f99 100644 --- a/descqa/configs/tpcf_Wang2013_rSDSS.yaml +++ b/descqa/configs/tpcf_Wang2013_rSDSS.yaml @@ -12,11 +12,11 @@ requested_columns: mag: - mag_r_sdss - mag_r_des + - mag_r_cModel - mag_r_lsst - mag_true_r_sdss - mag_true_r_des - mag_true_r_lsst - - mag_r_cModel # Definition of samples and cuts to apply. The names of these columns must # match the simple column name definitions above. diff --git a/descqa/configs/tpcf_Wang2013_rSDSS_jack.yaml b/descqa/configs/tpcf_Wang2013_rSDSS_jack.yaml index c4742fd4..0c57b103 100644 --- a/descqa/configs/tpcf_Wang2013_rSDSS_jack.yaml +++ b/descqa/configs/tpcf_Wang2013_rSDSS_jack.yaml @@ -12,11 +12,11 @@ requested_columns: mag: - mag_r_sdss - mag_r_des + - mag_r_cModel - mag_r_lsst - mag_true_r_sdss - mag_true_r_des - mag_true_r_lsst - - mag_r_cModel # Definition of samples and cuts to apply. The names of these columns must # match the simple column name definitions above. From 340e556f52bdfefdf9449eeed92f4474374d4981 Mon Sep 17 00:00:00 2001 From: evevkovacs Date: Thu, 17 Oct 2019 15:33:37 -0700 Subject: [PATCH 10/10] addressed review comments by prlarsen --- descqa/CorrelationsTwoPoint.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/descqa/CorrelationsTwoPoint.py b/descqa/CorrelationsTwoPoint.py index e64c4971..ac8b7778 100644 --- a/descqa/CorrelationsTwoPoint.py +++ b/descqa/CorrelationsTwoPoint.py @@ -332,7 +332,6 @@ def get_jackknife_randoms(N_jack, catalog_data, generate_randoms, ra='ra', dec=' for nj in range(N_jack): catalog_data_jk = dict(zip(catalog_data.keys(), [v[(jack_labels != nj)] for v in catalog_data.values()])) rand_cat, rr = generate_randoms(catalog_data_jk) #get randoms for this footprint - #print('nj = {}, area = {:.2f}'.format(nj, self.check_footprint(catalog_data_jk))) #check footprint randoms[str(nj)] = {'ran': rand_cat, 'rr':rr} return jack_labels, randoms @@ -488,8 +487,10 @@ def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): rand_cat, rr = self.generate_processed_randoms(catalog_data) #assumes ra and dec exist with open(os.path.join(output_dir, 'galaxy_count.dat'), 'a') as f: - f.write('Random (= catalog) Area = {:.1f} sq. deg.\n'.format(self.check_footprint(catalog_data))) - + f.write('Total (= catalog) Area = {:.1f} sq. deg.\n'.format(self.check_footprint(catalog_data))) + f.write('NOTE: 1) assuming catalog is of equal depth over the full area\n') + f.write(' 2) assuming sample contains enough galaxies to measure area\n') + if self.jackknife: #evaluate randoms for jackknife footprints jack_labels, randoms = self.get_jackknife_randoms(self.N_jack, catalog_data, self.generate_processed_randoms) @@ -499,10 +500,6 @@ def run_on_single_catalog(self, catalog_instance, catalog_name, output_dir): tmp_catalog_data = self.create_test_sample( catalog_data, sample_conditions) - with open(os.path.join(output_dir, 'galaxy_count.dat'), 'a') as f: - f.write('{} {} {:.1f} sq. deg.\n'.format(sample_name, len(tmp_catalog_data['ra']), - self.check_footprint(tmp_catalog_data))) #print area - if not len(tmp_catalog_data['ra']): continue