From c54ebf8da62401912fd3c9dbc795c74e4813d7dd Mon Sep 17 00:00:00 2001 From: Erik Rosenberg Date: Fri, 12 Apr 2024 16:29:14 +0100 Subject: [PATCH] Add 'auto' for healpix nside_tile Comment out print statements in projection --- demos/demo_proj_hp.py | 20 +++++++++++++------- python/proj/wcs.py | 18 ++++++++++++++++-- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/demos/demo_proj_hp.py b/demos/demo_proj_hp.py index b893208..8f17de1 100644 --- a/demos/demo_proj_hp.py +++ b/demos/demo_proj_hp.py @@ -1,6 +1,6 @@ import numpy as np import so3g -import healpy as hp +import healpy as hp ## Used for convenience in plotting from matplotlib import pyplot as plt def main(): @@ -20,8 +20,10 @@ def main(): isignal = 0 # What coord to use as signal. 0=ra, 1=dec ## Mapmaking params ## - nside = 512 - nside_tile = 16 + nside = 512 # Nside of the map + nside_tile = 16 # Nside of tiles to use. None for untiled or 'auto' to compute and set a reasonable number. + # 'auto' sets n_tile = nActiveTilesPerThread * nThreads / fsky with nActiveTilesPerThread=5, and rounds up to nearest power of 2 + # n_tile = 2 ** np.ceil(0.5 * np.log2(nActiveTilesPerThread * nThreads / (12 * fsky))) det_weights = None ## Simulate scanning and make TOD ## @@ -35,17 +37,21 @@ def main(): ######## Do simple 1-component mapmaking ######## print("Making untiled map") ## Make an untiled map ## + # Multi-threading possible but current naive thread assignment expected to perform very poorly on small sky area + # Use tiled map if you care about multi-thread performance / efficiency proj = so3g.proj.ProjectionistHealpix.for_healpix(nside, nside_tile=None) proj.assign_threads(asm, 'simple') ## Assign threads rhs = proj.to_map(signal, asm, det_weights=det_weights, comps='T') weights = proj.to_weights(asm, det_weights=det_weights, comps='T') # Now simple combination iweights = np.zeros_like(weights) - idx = (weights != 0) + idx = (weights != 0) iweights[idx] = 1/(weights[idx]) imap = (rhs * iweights[0])[0] ## Make a tiled map ## + # Tiles used for thread assignment and to efficiently store partial-sky information. Much better performance / thread scaling than untiled + # You want a few tiles per thread to allow load balancing so set this for nActiveTiles/nthreads = 12*nside_tile**2 * fsky / nthreads ~5-10 print("Making tiled map") proj = so3g.proj.ProjectionistHealpix.for_healpix(nside, nside_tile=nside_tile) proj.assign_threads(asm, 'tiles') ## Assign threads @@ -54,7 +60,7 @@ def main(): rhs = untile_healpix(proj.to_map(signal, asm, det_weights=det_weights, comps='T')) weights = untile_healpix(proj.to_weights(asm, det_weights=det_weights, comps='T')) iweights = np.zeros_like(weights) - idx = (weights != 0) + idx = (weights != 0) iweights[idx] = 1/(weights[idx]) imap_tiled = (rhs * iweights[0])[0] @@ -63,7 +69,7 @@ def main(): ## Plot ## imap[imap==0] = hp.UNSEEN - hp.mollview(imap, nest=True) + hp.mollview(imap, nest=True) plt.show() ## Plot gnomview ## @@ -75,7 +81,7 @@ def main(): width_pix = int(np.rad2deg(width) * 60. / reso) * 2 hp.gnomview(imap, nest=True, rot=center, xsize=width_pix*1.4, reso=reso) plt.show() - + ######## Helper functions for tiled healpix maps ######## diff --git a/python/proj/wcs.py b/python/proj/wcs.py index b8375a9..05fd55e 100644 --- a/python/proj/wcs.py +++ b/python/proj/wcs.py @@ -386,8 +386,9 @@ def get_active_tiles(self, assembly, assign=False): group_tiles[idx].append(_t) imax = np.argmax(group_n) max_ratio = group_n[imax] / np.mean(np.concatenate([group_n[:imax], group_n[imax+1:]])) - if len(group_n)>1 and max_ratio > 1.1: - print(f"Warning: Threads poorly balanced. Max/mean hits = {max_ratio}") + # if len(group_n)>1 and max_ratio > 1.1: + # print(f"Warning: Threads poorly balanced. Max/mean hits = {max_ratio}") + # Now paint them into Ranges. R = projeng.tile_ranges(q_native, assembly.dets, group_tiles) R = wrap_ivals(R) @@ -704,6 +705,19 @@ def for_healpix(cls, nside, interpol=None, ordering='NEST', nside_tile=None, act return self + def get_active_tiles(self, assembly, assign=False): + if self.nside_tile == 'auto': + nside_tile0 = 4 # ntile = 192, for estimating fsky + nActivePerThread = 5 # How many active pixels do you want per thread + self.nside_tile = nside_tile0 + nActive = len(self.get_active_tiles(assembly)['active_tiles']) + fsky = nActive / (12 * nside_tile0**2) + nThreads = so3g.useful_info()['omp_num_threads'] + # nside_tile is smallest power of 2 satisfying nTile >= nActivePerThread * nthread / fsky + self.nside_tile = int(2**np.ceil(0.5 * np.log2(nActivePerThread * nThreads / (12 * fsky)))) + #print('Setting nside_tile=', self.nside_tile) + return super().get_active_tiles(assembly, assign) + def get_coords(self, assembly, use_native=False, output=None): projeng = self.get_ProjEng('TQU') if use_native: