diffpy · sbillinge · Jun 15, 2025 · Jun 6, 2025 · Jun 8, 2025 · Jun 8, 2025
diff --git a/src/diffpy/snmf/main.py b/src/diffpy/snmf/main.py
@@ -7,27 +7,8 @@
 Y0 = np.loadtxt("input/W0.txt", dtype=float)
 N, M = MM.shape
 
-# Convert to DataFrames for display
-# df_X = pd.DataFrame(X, columns=[f"Comp_{i+1}" for i in range(X.shape[1])])
-# df_Y = pd.DataFrame(Y, columns=[f"Sample_{i+1}" for i in range(Y.shape[1])])
-# df_MM = pd.DataFrame(MM, columns=[f"Sample_{i+1}" for i in range(MM.shape[1])])
-# df_Y0 = pd.DataFrame(Y0, columns=[f"Sample_{i+1}" for i in range(Y0.shape[1])])
-
-# Print the matrices
-"""
-print("Feature Matrix (X):\n", df_X, "\n")
-print("Coefficient Matrix (Y):\n", df_Y, "\n")
-print("Data Matrix (MM):\n", df_MM, "\n")
-print("Initial Guess (Y0):\n", df_Y0, "\n")
-"""
-
-
-my_model = snmf_class.SNMFOptimizer(MM=MM, Y0=Y0, X0=X0, A=A0, components=2)
+my_model = snmf_class.SNMFOptimizer(MM=MM, Y0=Y0, X0=X0, A=A0, n_components=2)
 print("Done")
-# print(f"My final guess for X: {my_model.X}")
-# print(f"My final guess for Y: {my_model.Y}")
-# print(f"Compare to true X: {X_norm}")
-# print(f"Compare to true Y: {Y_norm}")
 np.savetxt("my_norm_X.txt", my_model.X, fmt="%.6g", delimiter=" ")
 np.savetxt("my_norm_Y.txt", my_model.Y, fmt="%.6g", delimiter=" ")
 np.savetxt("my_norm_A.txt", my_model.A, fmt="%.6g", delimiter=" ")
diff --git a/src/diffpy/snmf/snmf_class.py b/src/diffpy/snmf/snmf_class.py
@@ -4,8 +4,75 @@
 
 
 class SNMFOptimizer:
-    def __init__(self, MM, Y0=None, X0=None, A=None, rho=1e12, eta=610, max_iter=500, tol=5e-7, components=None):
-        print("Initializing SNMF Optimizer")
+    """A self-contained implementation of the stretched NMF algorithm (sNMF),
+    including sparse stretched NMF.
+
+    Instantiating the SNMFOptimizer class runs all the analysis immediately.
+    The results matrices can then be accessed as instance attributes
+    of the class (X, Y, and A).
+
+    For more information on sNMF, please reference:
+    Gu, R., Rakita, Y., Lan, L. et al. Stretched non-negative matrix factorization.
+    npj Comput Mater 10, 193 (2024). https://doi.org/10.1038/s41524-024-01377-5
+    """
+
+    def __init__(
+        self,
+        MM,
+        Y0=None,
+        X0=None,
+        A=None,
+        rho=1e12,
+        eta=610,
+        max_iter=500,
+        tol=5e-7,
+        n_components=None,
+        random_state=None,
+    ):
+        """Initialize an instance of SNMF and run the optimization
+
+        Parameters
+        ----------
+        MM: ndarray
+            The array containing the data to be decomposed. Shape is (length_of_signal,
+            number_of_conditions).
+        Y0: ndarray
+            The array containing initial guesses for the component weights
+            at each stretching condition. Shape is (number of components, number of
+            conditions) Must be provided if n_components is not provided. Will override
+            n_components if both are provided.
+        X0: ndarray
+            The array containing initial guesses for the intensities of each component per
+            row/sample/angle. Shape is (length_of_signal, number_of_components).
+        A: ndarray
+            The array containing initial guesses for the stretching factor for each component,
+            at each condition. Shape is (number_of_components, number_of_conditions).
+        rho: float
+            The float which sets a stretching factor that influences the decomposition.
+            Zero corresponds to no stretching present. Relatively insensitive and typically
+            adjusted in powers of 10.
+        eta: float
+            The integer which sets a sparsity factor than influences the decomposition.
+            Should be set to zero for non sparse data such as PDF. Can be used to improve
+            results for sparse data such as XRD, but due to instability, should be used
+            only after first selecting the best value for rho.
+        max_iter: int
+            The maximum number of times to update each of A, X, and Y before stopping
+            the optimization.
+        tol: float
+            The minimum fractional improvement in the objective function to allow
+            without terminating the optimization. Note that a minimum of 20 updates
+            are run before this parameter is checked.
+        n_components: int
+            The number of components to attempt to extract from MM. Note that this will
+            be overridden by Y0 if that is provided, but must be provided if no Y0 is
+            provided.
+        random_state: int
+            The integer which acts as a reproducible seed for the initial matrices used in
+            the optimization. Due to the non-convex nature of the problem, results may vary
+            even with the same initial guesses, so this does not make the program deterministic.
+        """
+
         self.MM = MM
         self.X0 = X0
         self.Y0 = Y0
@@ -15,23 +82,22 @@ def __init__(self, MM, Y0=None, X0=None, A=None, rho=1e12, eta=610, max_iter=500
         # Capture matrix dimensions
         self.N, self.M = MM.shape
         self.num_updates = 0
+        self.rng = np.random.default_rng(random_state)
 
         if Y0 is None:
-            if components is None:
-                raise ValueError("Must provide either Y0 or a number of components.")
+            if n_components is None:
+                raise ValueError("Must provide either Y0 or n_components.")
             else:
-                self.K = components
-                self.Y0 = np.random.beta(a=2.5, b=1.5, size=(self.K, self.M))  # This is untested
+                self.K = n_components
+                self.Y0 = self.rng.beta(a=2.5, b=1.5, size=(self.K, self.M))
         else:
             self.K = Y0.shape[0]
 
-        # Initialize A, X0 if not provided
         if self.A is None:
-            self.A = np.ones((self.K, self.M)) + np.random.randn(self.K, self.M) * 1e-3  # Small perturbation
+            self.A = np.ones((self.K, self.M)) + self.rng.normal(0, 1e-3, size=(self.K, self.M))
         if self.X0 is None:
-            self.X0 = np.random.rand(self.N, self.K)  # Ensures values in [0,1]
+            self.X0 = self.rng.random((self.N, self.K))
 
-        # Initialize solution matrices to be iterated on
         self.X = np.maximum(0, self.X0)
         self.Y = np.maximum(0, self.Y0)