Skip to content

Commit

Permalink
reorganize repo in a package style; add documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
haozhu233 committed Jun 11, 2023
1 parent 9aa9288 commit af2ef5c
Show file tree
Hide file tree
Showing 15 changed files with 153 additions and 63 deletions.
12 changes: 6 additions & 6 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ beeline_generated.zip
__pycache__/
.ipynb_checkpoints
data_viz_latest/
bash_scripts.ipynb
hammond_export_net.ipynb
final_report.ipynb
netrexcf.ipynb
gene_feature_exp.ipynb
hammond_viz_old.ipynb
notebooks/bash_scripts.ipynb
notebooks/hammond_export_net.ipynb
notebooks/final_report.ipynb
notebooks/netrexcf.ipynb
notebooks/gene_feature_exp.ipynb
notebooks/hammond_viz_old.ipynb

19 changes: 19 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
Copyright (c) Hao Zhu <haozhu233@gmail.com>

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Binary file added dist/grnvae-0.0.1.tar.gz
Binary file not shown.
9 changes: 9 additions & 0 deletions grnvae/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from .data import load_beeline

from .evaluate import get_metrics, extract_edges

from .logger import LightLogger, load_logger

from .models import GRNVAE

from .runner import DEFAULT_DEEPSEM_CONFIGS, DEFAULT_GRNVAE_CONFIGS, runGRNVAE
File renamed without changes.
44 changes: 0 additions & 44 deletions evaluate.py → grnvae/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,50 +42,6 @@ def get_metrics(A, ground_truth):
return {'AUPR': AUPR, 'AUPRR': AUPRR,
'EP': EP, 'EPR': EPR}

# def top_k_filter(A, evaluate_mask, topk):
# A= abs(A)
# if evaluate_mask is None:
# evaluate_mask = np.ones_like(A) - np.eye(len(A))
# A = A * evaluate_mask
# A_val = list(np.sort(abs(A.reshape(-1, 1)), 0)[:, 0])
# A_val.reverse()
# cutoff_all = A_val[topk]
# A_above_cutoff = np.zeros_like(A)
# A_above_cutoff[abs(A) > cutoff_all] = 1
# return A_above_cutoff

# def get_epr(A, ground_truth):
# ''' Calculate EPR

# Calculate EPR given predicted adjacency matrix and BEELINE
# ground truth

# Parameters
# ----------
# A: numpy.array
# Predicted adjacency matrix. Expected size is |g| x |g|.
# ground_truth: tuple
# BEELINE ground truth object exported by
# data.load_beeline_ground_truth. It's a tuple with the
# first element being truth_edges and second element being
# evaluate_mask.

# Returns
# -------
# tuple
# A tuple with calculated EP (in counts) and EPR
# '''
# eval_flat_mask, y_true, truth_edges, evaluate_mask = ground_truth
# num_nodes = A.shape[0]
# num_truth_edges = len(truth_edges)
# A_above_cutoff = top_k_filter(A, evaluate_mask, num_truth_edges)
# idx_source, idx_target = np.where(A_above_cutoff)
# A_edges = set(zip(idx_source, idx_target))
# overlap_A = A_edges.intersection(truth_edges)
# EP = len(overlap_A)
# EPR = 1. * EP / ((num_truth_edges ** 2) / np.sum(evaluate_mask))
# return EP, EPR

def extract_edges(A, gene_names=None, TFmask=None, threshold=0.0):
'''Extract predicted edges
Expand Down
48 changes: 38 additions & 10 deletions logger.py → grnvae/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,38 @@
import numpy as np

class LightLogger:
"""
''' A lightweight logger that runs completely in local
"""
This logger takes inspirations from w&b but runs completely in local
environment. Also, it supports logging multiple separated runs in
a single experiment.
Parameters
----------
result_dir: str
Path to the dir to save all the logging files
log_date: str
Within result_dir, logs from each date will be saved in each
subdirectory. This log_date variable provides a way to customize
this setting
Methods
-------
set_configs(configs)
Save experiment configurations (a python dictionary) to memory for
future exportation
start(note=None)
Start the logging of a new run within an experiment
log(log_dict, step=None)
Log `log_dict` (a dictionary containing performance) at each step
finish(save_now=True)
End the logging of a run and save to a local file if `save_now` is
True
to_df(tidy=True)
Convert saved logs to a pandas dataframe
save(path)
Save all the logs to path
'''

def __init__(self, result_dir='result_logs', log_date=None):
if log_date is None:
Expand Down Expand Up @@ -76,16 +105,15 @@ def save(self, path):
export['logging_vars'] = list(self.logging_vars)
with open(path, 'w') as f:
json.dump(export, f)

def delete_batch(self, batch_name, filter_field='experiment_name'):
to_delete = []
for k in self.mem.keys():
if self.mem[k][filter_field] == batch_name:
to_delete.append(k)
for k in to_delete:
del self.mem[k]

def load_logger(path):
''' Load a saved log file to a LightLogger object
Parameters
----------
path: str
path to the json file generated by LightLogger.save.
'''
with open(path, 'r') as f:
logger_import = json.load(f)
log_date = logger_import['log_dir'].replace(logger_import['result_dir']+'/', '')
Expand Down
43 changes: 43 additions & 0 deletions models.py → grnvae/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,49 @@ def forward(self, x):
return self.l3(out2)

class GRNVAE(nn.Module):
''' A GRN-VAE model
Parameters
----------
n_genes: int
Number of Genes
hidden_dim: int
Size of dimension in the MLP layers
z_dim: int
Size of dimension of Z
A_dim: int
Number of Adjacency matrix to be modeled at the same time
activation: function
A pytorch activation layer
train_on_non_zero: bool
Whether to train on non-zero data only
dropout_augmentation_p: double
Probability of augmented dropout. For example, 0.1 means that
10% of data will be temporarily assign to zero in each forward
pass
dropout_augmentation_type: str
Choose among 'all' (default), 'belowmean', 'belowhalfmean'. This
option specifies where dropout augmentation would happen. If
'belowmean' is selected, the augmentation would only happen on
values below global mean.
pretrained_A: torch.tensor
A customized initialization of A instead of random initialization.
Methods
-------
get_adj_
Obtain current adjacency matrix
get_adj
Obtain current adjacency matrix as a detached numpy array
I_minus_A
Calculate I - A
reparameterization(z_mu, z_sigma)
Reparameterization trick used in VAE
dropout_augmentation(x, global_mean)
Randomly add dropout noise to the original expression data
forward(x, global_mean, global_std, use_dropout_augmentation)
Forward pass
'''
def __init__(
self, n_gene, hidden_dim=128, z_dim=1, A_dim=1,
activation=nn.Tanh, train_on_non_zero=False,
Expand Down
8 changes: 5 additions & 3 deletions runner.py → grnvae/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,14 @@ def runGRNVAE(exp_array, configs,
logger: LightLogger or None
Either a predefined logger or None to start a new one. This
logger contains metric information logged during training.
progress_bar: bool
Whether to display a progress bar on epochs.
Returns
-------
torch.Module
A GRNVAE module object. You can export the adjacency matrix
using its get_adj() method.
(torch.Module, List)
This function returns a tuple of the trained model and a list of
adjacency matrix at all evaluation points.
'''
if configs['early_stopping'] != 0 and configs['train_split'] == 1.0:
raise Exception(
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
33 changes: 33 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
[build-system]
build-backend = "flit_core.buildapi"
requires = ["flit_core >=3.4,<4", "setuptools_scm"]

[project]
name = "grnvae"
authors = [
{ name = "Hao Zhu", email="haozhu233@gmail.com"},
{ name = "Donna Slonim", email="donna.slonim@tufts.edu"}
]
maintainers = [
{ name = "Hao Zhu", email = "haozhu233@gmail.com" }
]
license = {file = "LICENSE"}
description = "Improving GRN Inference using Dropout Augmentation"
version = "0.0.1"
requires-python = ">=3.7"
classifiers = ["License :: OSI Approved :: MIT License"]

dependencies = [
"numpy>=1.16.5",
"pandas>=1.1.1",
"scipy>1.4",
"h5py>=3",
"natsort",
"packaging>=20",
"anndata",
"scikit-learn",
"networkx"
]

[project.urls]
Home = "https://github.com/haozhu233/grnkit"

0 comments on commit af2ef5c

Please sign in to comment.