From c54ebf8da62401912fd3c9dbc795c74e4813d7dd Mon Sep 17 00:00:00 2001
From: Erik Rosenberg <errosenberg47@gmail.com>
Date: Fri, 12 Apr 2024 16:29:14 +0100
Subject: [PATCH] Add 'auto' for healpix nside_tile Comment out print
 statements in projection

---
 demos/demo_proj_hp.py | 20 +++++++++++++-------
 python/proj/wcs.py    | 18 ++++++++++++++++--
 2 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/demos/demo_proj_hp.py b/demos/demo_proj_hp.py
index b893208..8f17de1 100644
--- a/demos/demo_proj_hp.py
+++ b/demos/demo_proj_hp.py
@@ -1,6 +1,6 @@
 import numpy as np
 import so3g
-import healpy as hp
+import healpy as hp ## Used for convenience in plotting
 from matplotlib import pyplot as plt
 
 def main():
@@ -20,8 +20,10 @@ def main():
     isignal = 0 # What coord to use as signal. 0=ra, 1=dec
 
     ## Mapmaking params ##
-    nside = 512
-    nside_tile = 16
+    nside = 512  # Nside of the map
+    nside_tile = 16  # Nside of tiles to use. None for untiled or 'auto' to compute and set a reasonable number.
+    # 'auto' sets n_tile = nActiveTilesPerThread * nThreads / fsky with nActiveTilesPerThread=5, and rounds up to nearest power of 2
+    # n_tile = 2 ** np.ceil(0.5 * np.log2(nActiveTilesPerThread * nThreads / (12 * fsky)))
     det_weights = None
 
     ## Simulate scanning and make TOD ##
@@ -35,17 +37,21 @@ def main():
     ######## Do simple 1-component mapmaking  ########
     print("Making untiled map")
     ## Make an untiled map ##
+    # Multi-threading possible but current naive thread assignment expected to perform very poorly on small sky area
+    # Use tiled map if you care about multi-thread performance / efficiency
     proj = so3g.proj.ProjectionistHealpix.for_healpix(nside, nside_tile=None)
     proj.assign_threads(asm, 'simple') ## Assign threads
     rhs = proj.to_map(signal, asm, det_weights=det_weights, comps='T')
     weights = proj.to_weights(asm, det_weights=det_weights, comps='T')
     # Now simple combination
     iweights = np.zeros_like(weights)
-    idx = (weights != 0)    
+    idx = (weights != 0)
     iweights[idx] = 1/(weights[idx])
     imap = (rhs * iweights[0])[0]
 
     ## Make a tiled map ##
+    # Tiles used for thread assignment and to efficiently store partial-sky information. Much better performance / thread scaling than untiled
+    # You want a few tiles per thread to allow load balancing so set this for nActiveTiles/nthreads = 12*nside_tile**2 * fsky / nthreads ~5-10
     print("Making tiled map")
     proj = so3g.proj.ProjectionistHealpix.for_healpix(nside, nside_tile=nside_tile)
     proj.assign_threads(asm, 'tiles') ## Assign threads
@@ -54,7 +60,7 @@ def main():
     rhs = untile_healpix(proj.to_map(signal, asm, det_weights=det_weights, comps='T'))
     weights = untile_healpix(proj.to_weights(asm, det_weights=det_weights, comps='T'))
     iweights = np.zeros_like(weights)
-    idx = (weights != 0)    
+    idx = (weights != 0)
     iweights[idx] = 1/(weights[idx])
     imap_tiled = (rhs * iweights[0])[0]
 
@@ -63,7 +69,7 @@ def main():
 
     ## Plot ##
     imap[imap==0] = hp.UNSEEN
-    hp.mollview(imap, nest=True)    
+    hp.mollview(imap, nest=True)
     plt.show()
 
     ## Plot gnomview ##
@@ -75,7 +81,7 @@ def main():
     width_pix = int(np.rad2deg(width) * 60. / reso) * 2
     hp.gnomview(imap, nest=True, rot=center, xsize=width_pix*1.4, reso=reso)
     plt.show()
-    
+
 
 ######## Helper functions for tiled healpix maps ########
 
diff --git a/python/proj/wcs.py b/python/proj/wcs.py
index b8375a9..05fd55e 100644
--- a/python/proj/wcs.py
+++ b/python/proj/wcs.py
@@ -386,8 +386,9 @@ def get_active_tiles(self, assembly, assign=False):
                 group_tiles[idx].append(_t)
             imax = np.argmax(group_n)
             max_ratio = group_n[imax] / np.mean(np.concatenate([group_n[:imax], group_n[imax+1:]]))
-            if len(group_n)>1 and max_ratio > 1.1:
-                print(f"Warning: Threads poorly balanced. Max/mean hits = {max_ratio}")
+            # if len(group_n)>1 and max_ratio > 1.1:
+            #     print(f"Warning: Threads poorly balanced. Max/mean hits = {max_ratio}")
+
             # Now paint them into Ranges.
             R = projeng.tile_ranges(q_native, assembly.dets, group_tiles)
             R = wrap_ivals(R)
@@ -704,6 +705,19 @@ def for_healpix(cls, nside, interpol=None, ordering='NEST', nside_tile=None, act
 
         return self
 
+    def get_active_tiles(self, assembly, assign=False):
+        if self.nside_tile == 'auto':
+            nside_tile0 = 4  # ntile = 192, for estimating fsky
+            nActivePerThread = 5 # How many active pixels do you want per thread
+            self.nside_tile = nside_tile0
+            nActive = len(self.get_active_tiles(assembly)['active_tiles'])
+            fsky = nActive / (12 * nside_tile0**2)
+            nThreads = so3g.useful_info()['omp_num_threads']
+            # nside_tile is smallest power of 2 satisfying nTile >= nActivePerThread * nthread / fsky
+            self.nside_tile = int(2**np.ceil(0.5 * np.log2(nActivePerThread * nThreads / (12 * fsky))))
+            #print('Setting nside_tile=', self.nside_tile)
+        return super().get_active_tiles(assembly, assign)
+
     def get_coords(self, assembly, use_native=False, output=None):
         projeng = self.get_ProjEng('TQU')
         if use_native: