reorganize repo in a package style; add documentation

TuftsBCB · Jun 11, 2023 · af2ef5c · af2ef5c
1 parent 9aa9288
commit af2ef5c
Show file tree

Hide file tree

Showing 15 changed files with 153 additions and 63 deletions.
diff --git a/.gitignore b/.gitignore
@@ -9,10 +9,10 @@ beeline_generated.zip
 __pycache__/
 .ipynb_checkpoints
 data_viz_latest/
-bash_scripts.ipynb
-hammond_export_net.ipynb
-final_report.ipynb
-netrexcf.ipynb
-gene_feature_exp.ipynb
-hammond_viz_old.ipynb
+notebooks/bash_scripts.ipynb
+notebooks/hammond_export_net.ipynb
+notebooks/final_report.ipynb
+notebooks/netrexcf.ipynb
+notebooks/gene_feature_exp.ipynb
+notebooks/hammond_viz_old.ipynb
 
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,19 @@
+Copyright (c) Hao Zhu <haozhu233@gmail.com>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/dist/grnvae-0.0.1.tar.gz b/dist/grnvae-0.0.1.tar.gz
diff --git a/grnvae/__init__.py b/grnvae/__init__.py
@@ -0,0 +1,9 @@
+from .data import load_beeline
+
+from .evaluate import get_metrics, extract_edges
+
+from .logger import LightLogger, load_logger
+
+from .models import GRNVAE
+
+from .runner import DEFAULT_DEEPSEM_CONFIGS, DEFAULT_GRNVAE_CONFIGS, runGRNVAE
diff --git a/data.py → grnvae/data.py b/data.py → grnvae/data.py
diff --git a/evaluate.py → grnvae/evaluate.py b/evaluate.py → grnvae/evaluate.py
@@ -42,50 +42,6 @@ def get_metrics(A, ground_truth):
     return {'AUPR': AUPR, 'AUPRR': AUPRR, 
             'EP': EP, 'EPR': EPR}
 
-# def top_k_filter(A, evaluate_mask, topk):
-#     A= abs(A)
-#     if evaluate_mask is None:
-#         evaluate_mask = np.ones_like(A) - np.eye(len(A))
-#     A = A * evaluate_mask
-#     A_val = list(np.sort(abs(A.reshape(-1, 1)), 0)[:, 0])
-#     A_val.reverse()
-#     cutoff_all = A_val[topk]
-#     A_above_cutoff = np.zeros_like(A)
-#     A_above_cutoff[abs(A) > cutoff_all] = 1
-#     return A_above_cutoff
-
-# def get_epr(A, ground_truth):
-#     ''' Calculate EPR
-
-#     Calculate EPR given predicted adjacency matrix and BEELINE 
-#     ground truth
-
-#     Parameters
-#     ----------
-#     A: numpy.array 
-#         Predicted adjacency matrix. Expected size is |g| x |g|.
-#     ground_truth: tuple
-#         BEELINE ground truth object exported by 
-#         data.load_beeline_ground_truth. It's a tuple with the 
-#         first element being truth_edges and second element being
-#         evaluate_mask.
-
-#     Returns
-#     -------
-#     tuple
-#         A tuple with calculated EP (in counts) and EPR
-#     '''
-#     eval_flat_mask, y_true, truth_edges, evaluate_mask = ground_truth
-#     num_nodes = A.shape[0]
-#     num_truth_edges = len(truth_edges)
-#     A_above_cutoff = top_k_filter(A, evaluate_mask, num_truth_edges)
-#     idx_source, idx_target = np.where(A_above_cutoff)
-#     A_edges = set(zip(idx_source, idx_target))
-#     overlap_A = A_edges.intersection(truth_edges)
-#     EP = len(overlap_A)
-#     EPR = 1. * EP / ((num_truth_edges ** 2) / np.sum(evaluate_mask))
-#     return EP, EPR
-
 def extract_edges(A, gene_names=None, TFmask=None, threshold=0.0):
     '''Extract predicted edges
     

diff --git a/logger.py → grnvae/logger.py b/logger.py → grnvae/logger.py
@@ -5,9 +5,38 @@
 import numpy as np
 
 class LightLogger:
-    """
+    ''' A lightweight logger that runs completely in local
     
-    """
+    This logger takes inspirations from w&b but runs completely in local 
+    environment. Also, it supports logging multiple separated runs in 
+    a single experiment. 
+    
+    Parameters
+    ----------
+    result_dir: str
+        Path to the dir to save all the logging files
+    log_date: str
+        Within result_dir, logs from each date will be saved in each 
+        subdirectory. This log_date variable provides a way to customize
+        this setting
+        
+    Methods
+    -------
+    set_configs(configs)
+        Save experiment configurations (a python dictionary) to memory for 
+        future exportation
+    start(note=None)
+        Start the logging of a new run within an experiment
+    log(log_dict, step=None)
+        Log `log_dict` (a dictionary containing performance) at each step
+    finish(save_now=True)
+        End the logging of a run and save to a local file if `save_now` is 
+        True
+    to_df(tidy=True)
+        Convert saved logs to a pandas dataframe
+    save(path)
+        Save all the logs to path
+    '''
 
     def __init__(self, result_dir='result_logs', log_date=None):
         if log_date is None:
@@ -76,16 +105,15 @@ def save(self, path):
         export['logging_vars'] = list(self.logging_vars)
         with open(path, 'w') as f:
             json.dump(export, f)
-
-    def delete_batch(self, batch_name, filter_field='experiment_name'):
-        to_delete = []
-        for k in self.mem.keys():
-            if self.mem[k][filter_field] == batch_name:
-                to_delete.append(k)
-        for k in to_delete:
-            del self.mem[k]
 
 def load_logger(path):
+    ''' Load a saved log file to a LightLogger object
+    
+    Parameters
+    ----------
+    path: str
+        path to the json file generated by LightLogger.save.
+    '''
     with open(path, 'r') as f:
         logger_import = json.load(f)
     log_date = logger_import['log_dir'].replace(logger_import['result_dir']+'/', '')

diff --git a/models.py → grnvae/models.py b/models.py → grnvae/models.py
@@ -19,6 +19,49 @@ def forward(self, x):
         return self.l3(out2)
 
 class GRNVAE(nn.Module):
+    ''' A GRN-VAE model
+    
+    Parameters
+    ----------
+    n_genes: int
+        Number of Genes
+    hidden_dim: int
+        Size of dimension in the MLP layers
+    z_dim: int
+        Size of dimension of Z
+    A_dim: int
+        Number of Adjacency matrix to be modeled at the same time
+    activation: function
+        A pytorch activation layer
+    train_on_non_zero: bool
+        Whether to train on non-zero data only
+    dropout_augmentation_p: double
+        Probability of augmented dropout. For example, 0.1 means that
+        10% of data will be temporarily assign to zero in each forward 
+        pass
+    dropout_augmentation_type: str
+        Choose among 'all' (default), 'belowmean', 'belowhalfmean'. This 
+        option specifies where dropout augmentation would happen. If
+        'belowmean' is selected, the augmentation would only happen on
+        values below global mean. 
+    pretrained_A: torch.tensor
+        A customized initialization of A instead of random initialization. 
+        
+    Methods
+    -------
+    get_adj_
+        Obtain current adjacency matrix 
+    get_adj
+        Obtain current adjacency matrix as a detached numpy array
+    I_minus_A
+        Calculate I - A
+    reparameterization(z_mu, z_sigma)
+        Reparameterization trick used in VAE
+    dropout_augmentation(x, global_mean)
+        Randomly add dropout noise to the original expression data
+    forward(x, global_mean, global_std, use_dropout_augmentation)
+        Forward pass
+    '''
     def __init__(
         self, n_gene, hidden_dim=128, z_dim=1, A_dim=1, 
         activation=nn.Tanh, train_on_non_zero=False, 

diff --git a/runner.py → grnvae/runner.py b/runner.py → grnvae/runner.py
@@ -116,12 +116,14 @@ def runGRNVAE(exp_array, configs,
     logger: LightLogger or None
         Either a predefined logger or None to start a new one. This 
         logger contains metric information logged during training. 
+    progress_bar: bool
+        Whether to display a progress bar on epochs. 
         
     Returns
     -------
-    torch.Module
-        A GRNVAE module object. You can export the adjacency matrix
-        using its get_adj() method. 
+    (torch.Module, List)
+        This function returns a tuple of the trained model and a list of 
+        adjacency matrix at all evaluation points. 
     '''
     if configs['early_stopping'] != 0 and configs['train_split'] == 1.0:
         raise Exception(

diff --git a/bash_beeline.sh → notebooks/bash_beeline.sh b/bash_beeline.sh → notebooks/bash_beeline.sh
diff --git a/bash_hammond.sh → notebooks/bash_hammond.sh b/bash_hammond.sh → notebooks/bash_hammond.sh
diff --git a/exp_beeline.py → notebooks/exp_beeline.py b/exp_beeline.py → notebooks/exp_beeline.py
diff --git a/exp_hammond.py → notebooks/exp_hammond.py b/exp_hammond.py → notebooks/exp_hammond.py
diff --git a/getting_started.ipynb → notebooks/getting_started.ipynb b/getting_started.ipynb → notebooks/getting_started.ipynb
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,33 @@
+[build-system]
+build-backend = "flit_core.buildapi"
+requires = ["flit_core >=3.4,<4", "setuptools_scm"]
+
+[project]
+name = "grnvae"
+authors = [
+    { name = "Hao Zhu", email="haozhu233@gmail.com"},
+    { name = "Donna Slonim", email="donna.slonim@tufts.edu"}
+]
+maintainers = [
+    { name = "Hao Zhu", email = "haozhu233@gmail.com" }
+]
+license = {file = "LICENSE"}
+description = "Improving GRN Inference using Dropout Augmentation"
+version = "0.0.1"
+requires-python = ">=3.7"
+classifiers = ["License :: OSI Approved :: MIT License"]
+
+dependencies = [
+    "numpy>=1.16.5",
+    "pandas>=1.1.1",
+    "scipy>1.4",
+    "h5py>=3",
+    "natsort",
+    "packaging>=20",
+    "anndata",
+    "scikit-learn",
+    "networkx"
+]
+
+[project.urls]
+Home = "https://github.com/haozhu233/grnkit"