sandialabs
diff --git a/‎pygsti/algorithms/core.py‎
Lines changed: 4 additions & 5 deletions b/‎pygsti/algorithms/core.py‎
Lines changed: 4 additions & 5 deletions
diff --git a/‎pygsti/algorithms/gaugeopt.py‎
Lines changed: 2 additions & 2 deletions b/‎pygsti/algorithms/gaugeopt.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pygsti/baseobjs/label.py‎
Lines changed: 4 additions & 0 deletions b/‎pygsti/baseobjs/label.py‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎pygsti/circuits/circuit.py‎
Lines changed: 18 additions & 1 deletion b/‎pygsti/circuits/circuit.py‎
Lines changed: 18 additions & 1 deletion
diff --git a/‎pygsti/circuits/cloudcircuitconstruction.py‎
Lines changed: 1 addition & 1 deletion b/‎pygsti/circuits/cloudcircuitconstruction.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pygsti/evotypes/densitymx/effectcreps.cpp‎
Lines changed: 9 additions & 10 deletions b/‎pygsti/evotypes/densitymx/effectcreps.cpp‎
Lines changed: 9 additions & 10 deletions
diff --git a/‎pygsti/evotypes/evotype.py‎
Lines changed: 39 additions & 2 deletions b/‎pygsti/evotypes/evotype.py‎
Lines changed: 39 additions & 2 deletions
diff --git a/‎pygsti/forwardsims/mapforwardsim.py‎
Lines changed: 30 additions & 16 deletions b/‎pygsti/forwardsims/mapforwardsim.py‎
Lines changed: 30 additions & 16 deletions
@@ -31,8 +31,7 @@
 from pygsti.modelmembers import states as _state
 from pygsti.circuits.circuitlist import CircuitList as _CircuitList
 from pygsti.baseobjs.resourceallocation import ResourceAllocation as _ResourceAllocation
-from pygsti.optimize.customlm import CustomLMOptimizer as _CustomLMOptimizer
-from pygsti.optimize.customlm import Optimizer as _Optimizer
+from pygsti.optimize.simplerlm import Optimizer as _Optimizer, SimplerLMOptimizer as _SimplerLMOptimizer
 from pygsti import forwardsims as _fwdsims
 from pygsti import layouts as _layouts
 
@@ -619,7 +618,7 @@ def run_gst_fit_simple(dataset, start_model, circuits, optimizer, objective_func
     model : Model
         the best-fit model.
     """
-    optimizer = optimizer if isinstance(optimizer, _Optimizer) else _CustomLMOptimizer.cast(optimizer)
+    optimizer = optimizer if isinstance(optimizer, _Optimizer) else _SimplerLMOptimizer.cast(optimizer)
     objective_function_builder = _objfns.ObjectiveFunctionBuilder.cast(objective_function_builder)
     array_types = optimizer.array_types + \
         objective_function_builder.compute_array_types(optimizer.called_objective_methods, start_model.sim)
@@ -666,7 +665,7 @@ def run_gst_fit(mdc_store, optimizer, objective_function_builder, verbosity=0):
     objfn_store : MDCObjectiveFunction
         the objective function and store containing the best-fit model evaluated at the best-fit point.
     """
-    optimizer = optimizer if isinstance(optimizer, _Optimizer) else _CustomLMOptimizer.cast(optimizer)
+    optimizer = optimizer if isinstance(optimizer, _Optimizer) else _SimplerLMOptimizer.cast(optimizer)
     comm = mdc_store.resource_alloc.comm
     profiler = mdc_store.resource_alloc.profiler
     printer = VerbosityPrinter.create_printer(verbosity, comm)
@@ -843,7 +842,7 @@ def iterative_gst_generator(dataset, start_model, circuit_lists,
           (an "evaluated" model-dataset-circuits store).
     """
     resource_alloc = _ResourceAllocation.cast(resource_alloc)
-    optimizer = optimizer if isinstance(optimizer, _Optimizer) else _CustomLMOptimizer.cast(optimizer)
+    optimizer = optimizer if isinstance(optimizer, _Optimizer) else _SimplerLMOptimizer.cast(optimizer)
     comm = resource_alloc.comm
     profiler = resource_alloc.profiler
     printer = VerbosityPrinter.create_printer(verbosity, comm)
 
@@ -290,7 +290,7 @@ def gaugeopt_custom(model, objective_fn, gauge_group=None,
     gaugeGroupEl = gauge_group.compute_element(x0)  # re-used element for evals
 
     def _call_objective_fn(gauge_group_el_vec, oob_check=False):
-        # Note: oob_check can be True if oob_check_interval>=1 is given to the custom_leastsq below
+        # Note: oob_check can be True if oob_check_interval>=1 is given to the simplish_leastsq below
         gaugeGroupEl.from_vector(gauge_group_el_vec)
         return objective_fn(gaugeGroupEl, oob_check)
 
@@ -309,7 +309,7 @@ def _call_jacobian_fn(gauge_group_el_vec):
         assert(_call_jacobian_fn is not None), "Cannot use 'ls' method unless jacobian is available"
         ralloc = _baseobjs.ResourceAllocation(comm)  # FUTURE: plumb up a resource alloc object?
         test_f = _call_objective_fn(x0)
-        solnX, converged, msg, _, _, _, _, _ = _opt.custom_leastsq(
+        solnX, converged, msg, _, _, _, _ = _opt.simplish_leastsq(
             _call_objective_fn, _call_jacobian_fn, x0, f_norm2_tol=tol,
             jac_norm_tol=tol, rel_ftol=tol, rel_xtol=tol,
             max_iter=maxiter, resource_alloc=ralloc,
 
@@ -817,6 +817,10 @@ def __reduce__(self):
         # Need to tell serialization logic how to create a new Label since it's derived
         # from the immutable tuple type (so cannot have its state set after creation)
         return (LabelStr, (str(self), self.time), None)
+    
+    def __contains__(self, x):
+        #need to get a string rep of the tested label.
+        return str(x) in str(self)
 
     def to_native(self):
         """
 
@@ -549,6 +549,24 @@ def _copy_init(self, labels, line_labels, editable, name='', stringrep=None, occ
 
         return self
 
+    #pickle management functions
+    def __getstate__(self):
+        state_dict = self.__dict__
+        #if state_dict.get('_hash', None) is not None:
+        #    del state_dict['_hash'] #don't store the hash, recompute at unpickling time
+        return state_dict
+
+    def __setstate__(self, state_dict):
+        for k, v in state_dict.items():
+            self.__dict__[k] = v
+        if self.__dict__['_static']:
+            #reinitialize the hash
+            if self.__dict__.get('_hashable_tup', None) is not None:
+                self._hash = hash(self._hashable_tup)
+            else: #legacy support
+                self._hashable_tup = self.tup
+                self._hash = hash(self._hashable_tup)
+
 
     def to_label(self, nreps=1):
         """
@@ -636,7 +654,6 @@ def layertup(self):
         if self._static:
             return self._labels
         else:
-            #return tuple([to_label(layer_lbl) for layer_lbl in self._labels])
             return tuple([layer_lbl if isinstance(layer_lbl, _Label) 
                           else _Label(layer_lbl) for layer_lbl in self._labels])
     @property
 
@@ -2164,7 +2164,7 @@ def bitstr(num_qubits, bit):
     def invert(bstr):
         return [(0 if x else 1) for x in bstr]
 
-    half = [bitstr(n, k) for k in range(int(_np.ceil(_np.math.log(n, 2))))]
+    half = [bitstr(n, k) for k in range(int(_np.ceil(_np.log2(n))))]
     other_half = [invert(bstr) for bstr in half]
     return half + other_half
 
 
@@ -147,7 +147,7 @@ namespace CReps_densitymx {
 	finalIndx += ((finds >> k) & 1) * 3 * base;
 	base = base >> 2; // /= 4 so base == 4**(N-1-k)
       }
-
+      
       //Apply result
       if(parity(finds & _zvals_int))
 	ret -= _abs_elval * state->_dataptr[finalIndx]; // minus sign
@@ -157,15 +157,14 @@ namespace CReps_densitymx {
     return ret;
   }
 
-  INT EffectCRep_Computational::parity(INT x) {
-    // int64-bit specific
-    x = (x & 0x00000000FFFFFFFF)^(x >> 32);
-    x = (x & 0x000000000000FFFF)^(x >> 16);
-    x = (x & 0x00000000000000FF)^(x >> 8);
-    x = (x & 0x000000000000000F)^(x >> 4);
-    x = (x & 0x0000000000000003)^(x >> 2);
-    x = (x & 0x0000000000000001)^(x >> 1);
-    return x & 1; // return the last bit (0 or 1)
+  inline INT EffectCRep_Computational::parity(INT x) {
+    x ^= (x >> 32);
+    x ^= (x >> 16);
+    x ^= (x >> 8);
+    x ^= (x >> 4);
+    x ^= (x >> 2);
+    x ^= (x >> 1);
+    return x & 1; // Return the last bit
   }
 
 
 
@@ -1,6 +1,7 @@
 import importlib as _importlib
 
 from . import basereps as _basereps
+from pygsti.baseobjs.statespace import StateSpace as _StateSpace
 
 
 class Evotype(object):
@@ -50,10 +51,46 @@ class Evotype(object):
     }
 
     @classmethod
-    def cast(cls, obj, default_prefer_dense_reps=False):
+    def cast(cls, obj, default_prefer_dense_reps=None, state_space=None):
+        """
+        Cast the specified object to an Evotype with options for default Evotype
+        handling.
+
+        Parameters
+        ----------
+        obj : Evotype or str
+            Object to cast to an Evotype. If already an Evotype the object is simply
+            returned. Otherwise if a string we attempt to cast it to a recognized
+            evotype option. If the string "default" is passed in then we determine
+            the type of evotype used in conjunction with the two optional kwargs below.
+
+        default_prefer_dense_reps : None or bool, optional (default None)
+            Flag to indicate preference for dense representation types when casting
+            a string. If None then there is no preference and this will be determined
+            by the optional state_space kwarg, if present. Otherwise if a boolean value
+            this selection overrides any logic based on the state space.
+
+        state_space : StateSpace, optional (default None)
+            If not None then the dimension of the state space is used to determine whether
+            or not to prefer the use of dense representation types when not already specified
+            by the default_prefer_dense_reps kwarg.
+        
+        Returns
+        -------
+        Evotype
+        """
         if isinstance(obj, Evotype):
             return obj
-        elif obj == "default":
+        
+        if default_prefer_dense_reps is None:
+            if state_space is None:
+                default_prefer_dense_reps = False #reproduces legacy behavior.
+            else:
+                if not isinstance(state_space, _StateSpace):
+                    raise ValueError('state_space must be a StateSpace object.')
+                default_prefer_dense_reps = False if state_space.dim > 64 else True #HARDCODED
+
+        if obj == "default":
             return Evotype(cls.default_evotype, default_prefer_dense_reps)
         else:  # assume obj is a string naming an evotype
             return Evotype(str(obj), default_prefer_dense_reps)
 
@@ -159,7 +159,7 @@ def _array_types_for_method(cls, method_name):
         if method_name == 'bulk_fill_timedep_dchi2': return ('p',)  # just an additional parameter vector
         return super()._array_types_for_method(method_name)
 
-    def __init__(self, model=None, max_cache_size=0, num_atoms=None, processor_grid=None, param_blk_sizes=None,
+    def __init__(self, model=None, max_cache_size=None, num_atoms=None, processor_grid=None, param_blk_sizes=None,
                  derivative_eps=1e-7, hessian_eps=1e-5):
         #super().__init__(model, num_atoms, processor_grid, param_blk_sizes)
         _DistributableForwardSimulator.__init__(self, model, num_atoms, processor_grid, param_blk_sizes)
@@ -195,7 +195,9 @@ def copy(self):
                                    self._processor_grid, self._pblk_sizes)
 
     def create_layout(self, circuits, dataset=None, resource_alloc=None, array_types=('E',),
-                      derivative_dimensions=None, verbosity=0, layout_creation_circuit_cache=None):
+                      derivative_dimensions=None, verbosity=0, layout_creation_circuit_cache=None,
+                      circuit_partition_cost_functions=('size', 'propagations'),
+                      load_balancing_parameters=(1.15,.1)):
         """
         Constructs an circuit-outcome-probability-array (COPA) layout for a list of circuits.
 
@@ -226,11 +228,22 @@ def create_layout(self, circuits, dataset=None, resource_alloc=None, array_types
             Determines how much output to send to stdout.  0 means no output, higher
             integers mean more output.
         
-        layout_creation_circuit_cache:
-            A precomputed dictionary serving as a cache for completed
-            circuits. I.e. circuits with prep labels and POVM labels appended.
-            Along with other useful pre-computed circuit structures used in layout
-            creation.
+        layout_creation_circuit_cache : dict, optional (default None)
+            A precomputed dictionary serving as a cache for completed circuits. I.e. circuits 
+            with prep labels and POVM labels appended. Along with other useful pre-computed 
+            circuit structures used in layout creation.
+
+        circuit_partition_cost_functions : tuple of str, optional (default ('size', 'propagations'))
+            A tuple of strings denoting cost function to use in each of the two stages of the algorithm
+            for determining the partitions of the complete circuit set amongst atoms.
+            Allowed options are 'size', which corresponds to balancing the number of circuits, 
+            and 'propagations', which corresponds to balancing the number of state propagations.
+
+        load_balancing_parameters : tuple of floats, optional (default (1.15, .1))
+            A tuple of floats used as load balancing parameters when splitting a layout across atoms,
+            as in the multi-processor setting when using MPI. These parameters correspond to the `imbalance_threshold`
+            and `minimum_improvement_threshold` parameters described in the method `find_splitting_new`
+            of the `PrefixTable` class.
 
         Returns
         -------
@@ -256,15 +269,15 @@ def create_layout(self, circuits, dataset=None, resource_alloc=None, array_types
                 raise MemoryError("Attempted layout creation w/memory limit = %g <= 0!" % mem_limit)
             printer.log("Layout creation w/mem limit = %.2fGB" % (mem_limit * C))
 
-        #Start with how we'd like to split processors up (without regard to memory limit):
-
-        # when there are lots of processors, the from_vector calls dominante over the actual fwdsim,
-        # but we can reduce from_vector calls by having np1, np2 > 0 (each param requires a from_vector
-        # call when using finite diffs) - so we want to choose nc = Ng < nprocs and np1 > 1 (so nc * np1 = nprocs).
-        #work_per_proc = self.model.dim**2
+        #Start with how we'd like to split processors up (without regard to memory limit):        
+        #The current implementation of map (should) benefit more from having a matching between the number of atoms
+        #and the number of processors, at least for up to around two-qubits.
+        default_natoms = nprocs # heuristic
+        #TODO: factor in the mem_limit value to more intelligently set the default number of atoms.
 
         natoms, na, npp, param_dimensions, param_blk_sizes = self._compute_processor_distribution(
-            array_types, nprocs, num_params, len(circuits), default_natoms=2 * self.model.dim)  # heuristic?
+            array_types, nprocs, num_params, len(circuits), default_natoms=default_natoms)  
+        
         printer.log(f'Num Param Processors {npp}')
 
         printer.log("MapLayout: %d processors divided into %s (= %d) grid along circuit and parameter directions." %
@@ -273,8 +286,9 @@ def create_layout(self, circuits, dataset=None, resource_alloc=None, array_types
         assert(_np.prod((na,) + npp) <= nprocs), "Processor grid size exceeds available processors!"
 
         layout = _MapCOPALayout(circuits, self.model, dataset, self._max_cache_size, natoms, na, npp,
-                                param_dimensions, param_blk_sizes, resource_alloc, verbosity, 
-                                layout_creation_circuit_cache= layout_creation_circuit_cache)
+                                param_dimensions, param_blk_sizes, resource_alloc,circuit_partition_cost_functions,
+                                verbosity, layout_creation_circuit_cache= layout_creation_circuit_cache,
+                                load_balancing_parameters=load_balancing_parameters)
 
         if mem_limit is not None:
             loc_nparams1 = num_params / npp[0] if len(npp) > 0 else 0