CahanLab
diff --git a/‎setup.py
Lines changed: 4 additions & 1 deletion b/‎setup.py
Lines changed: 4 additions & 1 deletion
diff --git a/‎src/pySingleCellNet/__init__.py
Lines changed: 9 additions & 6 deletions b/‎src/pySingleCellNet/__init__.py
Lines changed: 9 additions & 6 deletions
diff --git a/‎src/pySingleCellNet/__version__.py
Lines changed: 1 addition & 1 deletion b/‎src/pySingleCellNet/__version__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/pySingleCellNet/config.py
Lines changed: 5 additions & 1 deletion b/‎src/pySingleCellNet/config.py
Lines changed: 5 additions & 1 deletion
diff --git a/‎src/pySingleCellNet/plotting/__init__.py
Lines changed: 4 additions & 0 deletions b/‎src/pySingleCellNet/plotting/__init__.py
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/pySingleCellNet/plotting/bar.py
Lines changed: 39 additions & 31 deletions b/‎src/pySingleCellNet/plotting/bar.py
Lines changed: 39 additions & 31 deletions
diff --git a/‎src/pySingleCellNet/plotting/dot.py
Lines changed: 47 additions & 1 deletion b/‎src/pySingleCellNet/plotting/dot.py
Lines changed: 47 additions & 1 deletion
@@ -30,7 +30,10 @@ def get_version(package_name):
         'umap-learn',
         'mygene',
         'palettable',
-        'gseapy'
+        'gseapy',
+        'alive_progress',
+        'python-igraph',
+        'marsilea'
     ],
     project_urls={
         'Documentation': 'https://pysinglecellnet.readthedocs.io/en/latest/',
 
@@ -1,29 +1,32 @@
 """PySingleCellNet"""
 
+from .config import SCN_CATEGORY_COLOR_DICT 
+from .config import SCN_DIFFEXP_KEY
 from . import plotting as pl
+from . import utils as ut
 from .stats import *
-from .utils import *
 from .tsp_rf import *
 from .scn_train import *
-from .scn_assess import *
+from .scn_assess import create_classifier_report
 from .postclass_analysis import *
 from .rank_class import *
-from .config import SCN_CATEGORY_COLOR_DICT 
+
 
 
 # Public API
 __all__ = [
     "__version__",
     "pl",
-    "mito_rib",
-    "limit_anndata_to_common_genes",
-    "splitCommonAnnData",
+    "ut",
+    "train_classifier",
     "scn_train",
     "scn_classify",
     "graph_from_nodes_and_edges",
     "comp_ct_thresh",
     "class_by_threshold",
     "determine_relationships"
+    "remove_xist_y_genes",
+    "create_classifier_report"
 ]    
 
 
@@ -1 +1 @@
-__version__ = '0.1.2'
+__version__ = '0.1.3'
@@ -19,4 +19,8 @@
 
 # fonts
 
-# ...
+# ...
+
+# Arbitrary strings
+
+SCN_DIFFEXP_KEY = "scnDiffExp"
@@ -7,6 +7,7 @@
 )
 
 from .dot import (
+    umi_counts_ranked,
     ontogeny_graph,
     dotplot_deg,
     dotplot_diff_gene,
@@ -15,6 +16,7 @@
 )
 
 from .heatmap import (
+    heatmap_classifier_report,
     heatmap_scores,
     heatmap_gsea,
     heatmap_genes,
@@ -31,11 +33,13 @@
     "stackedbar_categories",
     "stackedbar_categories_list",
     "bar_classifier_f1",
+    "umi_counts_ranked",
     "ontogeny_graph",
     "dotplot_deg",
     "dotplot_diff_gene",
     "dotplot_scn_scores",
     "umap_scores",
+    "heatmap_classifier_report",
     "heatmap_scores",
     "heatmap_gsea",
     "heatmap_genes",
 
@@ -19,26 +19,29 @@
 from anndata import AnnData
 from scipy.sparse import csr_matrix
 from sklearn.metrics import f1_score
+
 # from ..utils import *
 from pySingleCellNet.config import SCN_CATEGORY_COLOR_DICT
 
-
-import numpy as np
-import matplotlib.pyplot as plt
+from scipy.spatial.distance import pdist, squareform
+from scipy.cluster.hierarchy import linkage, leaves_list
 from anndata import AnnData
 
+
 def stackedbar_composition(
     adata: AnnData, 
     groupby: str, 
     obs_column='SCN_class', 
     labels=None, 
     bar_width: float = 0.75, 
     color_dict=None, 
-    ax=None
+    ax=None,
+    order_by_similarity: bool = False,
+    similarity_metric: str = 'correlation'
 ):
     """
     Plots a stacked bar chart of cell type proportions for a single AnnData object grouped by a specified column.
-
+    
     Args:
         adata (anndata.AnnData): An AnnData object.
         groupby (str): The column in `.obs` to group by.
@@ -50,39 +53,36 @@ def stackedbar_composition(
         color_dict (Dict[str, str], optional): A dictionary mapping categories to specific colors. If not provided,
             default colors will be used.
         ax (matplotlib.axes.Axes, optional): The axis to plot on. If not provided, a new figure and axis will be created.
-
+        order_by_similarity (bool, optional): Whether to order the bars by similarity in composition. Defaults to False.
+        similarity_metric (str, optional): The metric to use for similarity ordering. Defaults to 'correlation'.
+    
     Raises:
         ValueError: If the length of `labels` does not match the number of unique groups.
-
+    
     Examples:
         >>> stackedbar_composition(adata, groupby='sample', obs_column='your_column_name')
         >>> fig, ax = plt.subplots()
         >>> stackedbar_composition(adata, groupby='sample', obs_column='your_column_name', ax=ax)
     """
-    
     # Ensure the groupby column exists in .obs
     if groupby not in adata.obs.columns:
         raise ValueError(f"The groupby column '{groupby}' does not exist in the .obs attribute.")
 
-   
     # Check if groupby column is categorical or not
     if pd.api.types.is_categorical_dtype(adata.obs[groupby]):
         unique_groups = adata.obs[groupby].cat.categories.to_list()
     else:
         unique_groups = adata.obs[groupby].unique().tolist()
-
+    
     # Extract unique groups and ensure labels are provided or create default ones
-    unique_groups = adata.obs[groupby].cat.categories.to_list()
-
-
     if labels is None:
         labels = unique_groups
     elif len(labels) != len(unique_groups):
         raise ValueError("Length of 'labels' must match the number of unique groups.")
 
     if color_dict is None:
         color_dict = adata.uns['SCN_class_colors'] 
-
+    
     # Extracting category proportions per group
     category_counts = []
     categories = set()
@@ -101,12 +101,21 @@ def stackedbar_composition(
             j = categories.index(category)
             proportions[j, i] = counts[category]
 
+    # Ordering groups by similarity if requested
+    if order_by_similarity:
+        dist_matrix = pdist(proportions.T, metric=similarity_metric)
+        linkage_matrix = linkage(dist_matrix, method='average')
+        order = leaves_list(linkage_matrix)
+        proportions = proportions[:, order]
+        unique_groups = [unique_groups[i] for i in order]
+        labels = [labels[i] for i in order]
+    
     # Plotting
     if ax is None:
         fig, ax = plt.subplots()
     else:
         fig = ax.figure
-
+    
     bottom = np.zeros(len(unique_groups))
     for i, category in enumerate(categories):
         color = color_dict[category] if color_dict and category in color_dict else None
@@ -135,7 +144,9 @@ def stackedbar_composition(
         return ax
 
 
-def stackedbar_composition2(
+
+
+def stackedbar_composition_old(
     adata: AnnData, 
     groupby: str, 
     obs_column = 'SCN_class', 
@@ -332,25 +343,28 @@ def stackedbar_categories(
     adata: AnnData,
     scn_classes_to_display = None, 
     bar_height=0.8,
-    color_dict = None
+    color_dict = None,
+    class_col_name = 'SCN_class_argmax',
+    category_col_name = 'SCN_class_type'
 ):
     # Copy the obs DataFrame to avoid modifying the original data
     df = adata.obs.copy()
 
     # Ensure the columns 'SCN_class' and 'SCN_class_type' exist
-    if 'SCN_class' not in df.columns or 'SCN_class_type' not in df.columns:
-        raise KeyError("Columns 'SCN_class' and 'SCN_class_type' must be present in adata.obs")
+    # if 'SCN_class' not in df.columns or 'SCN_class_type' not in df.columns:
+    if class_col_name not in df.columns or category_col_name not in df.columns:
+        raise KeyError(f"Columns '{class_col_name}' and '{category_col_name}' must be present in adata.obs")
 
     # Ensure SCN_class categories are consistent
-    df['SCN_class'] = df['SCN_class'].astype('category')
-    df['SCN_class_type'] = df['SCN_class_type'].astype('category')
+    df[class_col_name] = df[class_col_name].astype('category')
+    df[category_col_name] = df[category_col_name].astype('category')
 
-    df['SCN_class'] = df['SCN_class'].cat.set_categories(df['SCN_class'].cat.categories)
-    df['SCN_class_type'] = df['SCN_class_type'].cat.set_categories(df['SCN_class_type'].cat.categories)
+    df[class_col_name] = df[class_col_name].cat.set_categories(df[class_col_name].cat.categories)
+    df[category_col_name] = df[category_col_name].cat.set_categories(df[category_col_name].cat.categories)
 
     # Group by 'SCN_class' and get value counts for 'SCN_class_type'
     try:
-        counts = df.groupby('SCN_class')['SCN_class_type'].value_counts().unstack().fillna(0)
+        counts = df.groupby(class_col_name)[category_col_name].value_counts().unstack().fillna(0)
     except Exception as e:
         print("Error during groupby and value_counts operations:", e)
         return
@@ -362,7 +376,7 @@ def stackedbar_categories(
     total_counts = counts.sum(axis=1)
     total_percent = (total_counts / total_counts.sum() * 100).round(1)  # Converts to percentage and round
 
-    all_classes = df['SCN_class'].unique()
+    all_classes = df[class_col_name].unique()
     if scn_classes_to_display is not None:
         if not all(cls in all_classes for cls in scn_classes_to_display):
             raise ValueError("Some values in 'scn_classes_to_display' do not match available 'SCN_class' values in the provided DataFrames.")
@@ -415,8 +429,6 @@ def stackedbar_categories(
 
 
 
-
-
 def stackedbar_categories_list_old(
     ads, 
     titles=None,
@@ -505,8 +517,6 @@ def stackedbar_categories_list_old(
     return fig
 
 
-
-
 def stackedbar_categories_list(
     ads, 
     titles=None,
@@ -593,8 +603,6 @@ def stackedbar_categories_list(
 
 
 
-
-
 def bar_classifier_f1(adata: AnnData, ground_truth: str = "celltype", class_prediction: str = "SCN_class", bar_height=0.8):
     """
     Plots a bar graph of F1 scores per class based on ground truth and predicted classifications.
 
@@ -23,6 +23,51 @@
 from sklearn.metrics import f1_score
 from ..utils import *
 
+def umi_counts_ranked(adata, total_counts_column="total_counts"):
+    """
+    Identifies and plors the knee point of the UMI count distribution in an AnnData object.
+
+    Parameters:
+        adata (AnnData): The input AnnData object.
+        total_counts_column (str): Column in `adata.obs` containing total UMI counts. Default is "total_counts".
+        show (bool): If True, displays a log-log plot with the knee point. Default is True.
+
+    Returns:
+        float: The UMI count value at the knee point.
+    """
+    # Extract total UMI counts
+    umi_counts = adata.obs[total_counts_column]
+    
+    # Sort UMI counts in descending order
+    sorted_umi_counts = np.sort(umi_counts)[::-1]
+    
+    # Compute cumulative UMI counts (normalized to a fraction)
+    cumulative_counts = np.cumsum(sorted_umi_counts)
+    cumulative_fraction = cumulative_counts / cumulative_counts[-1]
+    
+    # Compute derivatives to identify the knee point
+    first_derivative = np.gradient(cumulative_fraction)
+    second_derivative = np.gradient(first_derivative)
+    
+    # Find the index of the maximum curvature (knee point)
+    knee_idx = np.argmax(second_derivative)
+    knee_point_value = sorted_umi_counts[knee_idx]
+    
+    # Generate log-log plot
+    cell_ranks = np.arange(1, len(sorted_umi_counts) + 1)
+    plt.figure(figsize=(10, 6))
+    plt.plot(cell_ranks, sorted_umi_counts, marker='o', markersize=2, linestyle='-', linewidth=0.5, label="UMI Counts")
+    plt.axvline(cell_ranks[knee_idx], color="red", linestyle="--", label=f"Knee Point: {knee_point_value}")
+    plt.title('UMI Counts Per Cell (Log-Log Scale)', fontsize=14)
+    plt.xlabel('Cell Rank (Descending)', fontsize=12)
+    plt.ylabel('Total UMI Counts', fontsize=12)
+    plt.xscale('log')
+    plt.yscale('log')
+    plt.grid(True, linestyle='--', linewidth=0.5)
+    plt.legend()
+    plt.tight_layout()
+    plt.show()
+    
 
 def ontogeny_graph(gra, color_dict): 
     ig.config['plotting.backend'] = 'matplotlib'
@@ -34,7 +79,8 @@ def ontogeny_graph(gra, color_dict):
     v_style["margin"] = (50)
 
     for vertex in gra.vs:
-        vertex["color"] = convert_color(color_dict.get(vertex["name"], np.array([0.5, 0.5, 0.5]))) 
+        # vertex["color"] = convert_color(color_dict.get(vertex["name"], np.array([0.5, 0.5, 0.5])))
+        vertex["color"] = tuple(color_dict.get(vertex["name"], np.array([0.5, 0.5, 0.5])))  
 
     # Normalize node sizes for better visualization
     max_size = 50  # Maximum size for visualization
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = '0.1.2'`
	`1`	`+__version__ = '0.1.3'`