@@ -159,7 +159,7 @@ def _array_types_for_method(cls, method_name):
159159        if  method_name  ==  'bulk_fill_timedep_dchi2' : return  ('p' ,)  # just an additional parameter vector 
160160        return  super ()._array_types_for_method (method_name )
161161
162-     def  __init__ (self , model = None , max_cache_size = 0 , num_atoms = None , processor_grid = None , param_blk_sizes = None ,
162+     def  __init__ (self , model = None , max_cache_size = None , num_atoms = None , processor_grid = None , param_blk_sizes = None ,
163163                 derivative_eps = 1e-7 , hessian_eps = 1e-5 ):
164164        #super().__init__(model, num_atoms, processor_grid, param_blk_sizes) 
165165        _DistributableForwardSimulator .__init__ (self , model , num_atoms , processor_grid , param_blk_sizes )
@@ -195,7 +195,9 @@ def copy(self):
195195                                   self ._processor_grid , self ._pblk_sizes )
196196
197197    def  create_layout (self , circuits , dataset = None , resource_alloc = None , array_types = ('E' ,),
198-                       derivative_dimensions = None , verbosity = 0 , layout_creation_circuit_cache = None ):
198+                       derivative_dimensions = None , verbosity = 0 , layout_creation_circuit_cache = None ,
199+                       circuit_partition_cost_functions = ('size' , 'propagations' ),
200+                       load_balancing_parameters = (1.15 ,.1 )):
199201        """ 
200202        Constructs an circuit-outcome-probability-array (COPA) layout for a list of circuits. 
201203
@@ -226,11 +228,22 @@ def create_layout(self, circuits, dataset=None, resource_alloc=None, array_types
226228            Determines how much output to send to stdout.  0 means no output, higher 
227229            integers mean more output. 
228230         
229-         layout_creation_circuit_cache: 
230-             A precomputed dictionary serving as a cache for completed 
231-             circuits. I.e. circuits with prep labels and POVM labels appended. 
232-             Along with other useful pre-computed circuit structures used in layout 
233-             creation. 
231+         layout_creation_circuit_cache : dict, optional (default None) 
232+             A precomputed dictionary serving as a cache for completed circuits. I.e. circuits  
233+             with prep labels and POVM labels appended. Along with other useful pre-computed  
234+             circuit structures used in layout creation. 
235+ 
236+         circuit_partition_cost_functions : tuple of str, optional (default ('size', 'propagations')) 
237+             A tuple of strings denoting cost function to use in each of the two stages of the algorithm 
238+             for determining the partitions of the complete circuit set amongst atoms. 
239+             Allowed options are 'size', which corresponds to balancing the number of circuits,  
240+             and 'propagations', which corresponds to balancing the number of state propagations. 
241+ 
242+         load_balancing_parameters : tuple of floats, optional (default (1.15, .1)) 
243+             A tuple of floats used as load balancing parameters when splitting a layout across atoms, 
244+             as in the multi-processor setting when using MPI. These parameters correspond to the `imbalance_threshold` 
245+             and `minimum_improvement_threshold` parameters described in the method `find_splitting_new` 
246+             of the `PrefixTable` class. 
234247
235248        Returns 
236249        ------- 
@@ -256,15 +269,15 @@ def create_layout(self, circuits, dataset=None, resource_alloc=None, array_types
256269                raise  MemoryError ("Attempted layout creation w/memory limit = %g <= 0!"  %  mem_limit )
257270            printer .log ("Layout creation w/mem limit = %.2fGB"  %  (mem_limit  *  C ))
258271
259-         #Start with how we'd like to split processors up (without regard to memory limit): 
260- 
261-         # when there are lots of processors, the from_vector calls dominante over the actual fwdsim, 
262-         # but we can reduce from_vector calls by having np1, np2 > 0 (each param requires a from_vector 
263-         # call when using finite diffs) - so we want to choose nc = Ng < nprocs and np1 > 1 (so nc * np1 = nprocs). 
264-         #work_per_proc = self.model.dim**2 
272+         #Start with how we'd like to split processors up (without regard to memory limit):         
273+         #The current implementation of map (should) benefit more from having a matching between the number of atoms 
274+         #and the number of processors, at least for up to around two-qubits. 
275+         default_natoms  =  nprocs  # heuristic 
276+         #TODO: factor in the mem_limit value to more intelligently set the default number of atoms. 
265277
266278        natoms , na , npp , param_dimensions , param_blk_sizes  =  self ._compute_processor_distribution (
267-             array_types , nprocs , num_params , len (circuits ), default_natoms = 2  *  self .model .dim )  # heuristic? 
279+             array_types , nprocs , num_params , len (circuits ), default_natoms = default_natoms )  
280+         
268281        printer .log (f'Num Param Processors { npp }  )
269282
270283        printer .log ("MapLayout: %d processors divided into %s (= %d) grid along circuit and parameter directions."  % 
@@ -273,8 +286,9 @@ def create_layout(self, circuits, dataset=None, resource_alloc=None, array_types
273286        assert (_np .prod ((na ,) +  npp ) <=  nprocs ), "Processor grid size exceeds available processors!" 
274287
275288        layout  =  _MapCOPALayout (circuits , self .model , dataset , self ._max_cache_size , natoms , na , npp ,
276-                                 param_dimensions , param_blk_sizes , resource_alloc , verbosity , 
277-                                 layout_creation_circuit_cache =  layout_creation_circuit_cache )
289+                                 param_dimensions , param_blk_sizes , resource_alloc ,circuit_partition_cost_functions ,
290+                                 verbosity , layout_creation_circuit_cache =  layout_creation_circuit_cache ,
291+                                 load_balancing_parameters = load_balancing_parameters )
278292
279293        if  mem_limit  is  not None :
280294            loc_nparams1  =  num_params  /  npp [0 ] if  len (npp ) >  0  else  0 
0 commit comments