Skip to content

Commit

Permalink
add lmi_multiple_run_cpp function
Browse files Browse the repository at this point in the history
  • Loading branch information
Xiaojieqiu committed Sep 17, 2017
1 parent 4130f2b commit c04e101
Show file tree
Hide file tree
Showing 14 changed files with 315 additions and 91 deletions.
37 changes: 31 additions & 6 deletions Scribe/R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,31 @@ lmi_single_run <- function(x, y, delay) {
.Call('_Scribe_lmi_single_run', PACKAGE = 'Scribe', x, y, delay)
}

lmi_multiple_run_cpp <- function(x, y, d = 1L, run_vec = 0L) {
.Call('_Scribe_lmi_multiple_run_cpp', PACKAGE = 'Scribe', x, y, d, run_vec)
}

#' @title
#' lmi_multiple_run
#' @description
#' This subroutine calculates the lagged mutual information (multiple runs)
#'
#' @param x one random variable from the time-series data
#'
#' @param y another random variable from the time-series data
#'
#' @param delay Time lags used to estimate the RDI values
#'
#' @details
#' \code{cmi} takes two random variable x and y and estimated their mutual information conditioned on the third random variable z
#' using the KSG estimator.
#' It relies on the ANN package to query the kNN with KDTree algorithm.
#' @return a numeric value for the condition mutual information estimator between two variables (x, y) conditioned on variable z
#' @export
lmi_multiple_run <- function(x, y, d, run_vec) {
.Call('_Scribe_lmi_multiple_run', PACKAGE = 'Scribe', x, y, d, run_vec)
}

#' @title
#' This function simulates the CONDITIONED DIRECTED mutual information from X to Y CONDITIONED ON Z when you have a SINGLE run of the processes
#' @description
Expand Down Expand Up @@ -135,7 +160,7 @@ calculate_rdi_cpp_wrap <- function(expr_data, delays, super_graph, turning_point
.Call('_Scribe_calculate_rdi_cpp_wrap', PACKAGE = 'Scribe', expr_data, delays, super_graph, turning_points, method)
}

extract_top_incoming_nodes_delays <- function(max_rdi_value, max_rdi_delays, k) {
extract_top_incoming_nodes_delays <- function(max_rdi_value, max_rdi_delays, k = 5L) {
.Call('_Scribe_extract_top_incoming_nodes_delays', PACKAGE = 'Scribe', max_rdi_value, max_rdi_delays, k)
}

Expand All @@ -158,23 +183,23 @@ calculate_conditioned_rdi_cpp_wrap <- function(expr_data, super_graph, max_rdi_v
#' It's implimented in C++, providing a (small) increase in speed over the R equivalent.
#' @return a updated matrix with gene expression smoothed with window size equal to window_size
#' @export
smooth_gene <- function(expr_data, window_size) {
smooth_gene <- function(expr_data, window_size = 40L) {
.Call('_Scribe_smooth_gene', PACKAGE = 'Scribe', expr_data, window_size)
}

rdi_single_run_multiple_run_cpp <- function(x, y, d, run_vec) {
rdi_single_run_multiple_run_cpp <- function(x, y, d = 1L, run_vec = 0L) {
.Call('_Scribe_rdi_single_run_multiple_run_cpp', PACKAGE = 'Scribe', x, y, d, run_vec)
}

calculate_rdi_multiple_run_cpp <- function(expr_data, delays, run_vec, super_graph, turning_points, method) {
calculate_rdi_multiple_run_cpp <- function(expr_data, delays, run_vec, super_graph, turning_points = 0L, method = 1L) {
.Call('_Scribe_calculate_rdi_multiple_run_cpp', PACKAGE = 'Scribe', expr_data, delays, run_vec, super_graph, turning_points, method)
}

calculate_rdi_multiple_run_cpp_wrap <- function(expr_data, delays, run_vec, super_graph, turning_points, method) {
.Call('_Scribe_calculate_rdi_multiple_run_cpp_wrap', PACKAGE = 'Scribe', expr_data, delays, run_vec, super_graph, turning_points, method)
}

rdi_multiple_runs_conditioned_cpp <- function(x, y, z, z_delays, d, run_vec) {
rdi_multiple_runs_conditioned_cpp <- function(x, y, z, z_delays, d = 1L, run_vec = 0L) {
.Call('_Scribe_rdi_multiple_runs_conditioned_cpp', PACKAGE = 'Scribe', x, y, z, z_delays, d, run_vec)
}

Expand All @@ -201,7 +226,7 @@ rdi_multiple_runs_conditioned <- function(x, y, z, z_delays, d, run_vec) {
.Call('_Scribe_rdi_multiple_runs_conditioned', PACKAGE = 'Scribe', x, y, z, z_delays, d, run_vec)
}

calculate_multiple_run_conditioned_rdi_cpp <- function(expr_data, super_graph, max_rdi_value, max_rdi_delays, run_vec, k) {
calculate_multiple_run_conditioned_rdi_cpp <- function(expr_data, super_graph, max_rdi_value, max_rdi_delays, run_vec = 0L, k = 5L) {
.Call('_Scribe_calculate_multiple_run_conditioned_rdi_cpp', PACKAGE = 'Scribe', expr_data, super_graph, max_rdi_value, max_rdi_delays, run_vec, k)
}

Expand Down
96 changes: 57 additions & 39 deletions Scribe/R/plotting.R
Original file line number Diff line number Diff line change
Expand Up @@ -715,7 +715,8 @@ plot_rdi_pairs <- function(cds_subset, gene_pairs_mat,
scales = "free",
verbose = FALSE) {

all_genes_in_pair <- as.vector(as.matrix(gene_pairs_mat))
gene_pairs_mat <- as.matrix(gene_pairs_mat)
all_genes_in_pair <- as.vector(gene_pairs_mat)
if(! all(all_genes_in_pair %in% fData(cds_subset)$gene_short_name)) {
stop("cds_subset doesn't include all genes in gene_pairs_mat Make sure all genes are included in gene_short_name column of the cds")
}
Expand Down Expand Up @@ -777,33 +778,34 @@ plot_rdi_pairs <- function(cds_subset, gene_pairs_mat,
#
########################################################################################################################################################################
if(conditioning == T) {
# # do a linear line fitting
# # do a linear line fitting
# df <- data.frame(y = y, z = z)
# full_model_fit <- VGAM::vglm(as.formula("y~z"), data = df, family=gaussianff())
#
# y <- resid(full_model_fit)
data <- Reduce(cbind, list(x, y, z))

xyz_kde <- kde(data, gridsize = c(25,25, 25))

x_meshgrid <- xyz_kde$eval.points[[1]]
y_meshgrid <- xyz_kde$eval.points[[2]]
z_meshgrid <- xyz_kde$eval.points[[3]]

den_res <- matrix(0, nrow = length(x_meshgrid), ncol = length(y_meshgrid))

nConBins <- length(z_meshgrid)
for(conBins in 1:nConBins) {
den_res_tmp <- xyz_kde$estimate[, , conBins]
den_res_tmp[!is.finite(den_res_tmp)] <- 0
den_res <- den_res + den_res_tmp / (nConBins * sum(den_res_tmp))
}

den_x <- colSums(den_res) # just calculate the sum for each column
# data <- Reduce(cbind, list(x, y, z))
#
# xyz_kde <- kde(data, gridsize = c(25,25, 25))
#
# x_meshgrid <- xyz_kde$eval.points[[1]]
# y_meshgrid <- xyz_kde$eval.points[[2]]
# z_meshgrid <- xyz_kde$eval.points[[3]]
#
# den_res <- matrix(0, nrow = length(x_meshgrid), ncol = length(y_meshgrid))
#
# nConBins <- length(z_meshgrid)
# for(conBins in 1:nConBins) {
# den_res_tmp <- xyz_kde$estimate[, , conBins]
# den_res_tmp[!is.finite(den_res_tmp)] <- 0
# den_res <- den_res + den_res_tmp / (nConBins * sum(den_res_tmp))
# }
#
# den_x <- colSums(den_res) # just calculate the sum for each column
}
########################################################################################################################################################################
if(!conditioning) {
rng_vec <- quantile(z, seq(0, 1, length.out = nConBins + 1))
if(conditioning) {
rng_vec <- sort(z)[quantile(1:length(z), seq(0, 1, length.out = nConBins + 1))] #ensure each bin gets the same number of cells
den_res_array <- array(0, dim = c(dim_val, dim_val, nConBins))
den_res <- matrix(0, nrow = dim_val, ncol = dim_val)

Expand Down Expand Up @@ -837,33 +839,49 @@ plot_rdi_pairs <- function(cds_subset, gene_pairs_mat,
# message('x_meshgrid is ', x_meshgrid)
# message('y_meshgrid is ', y_meshgrid)
}
max_ind <- 0
tmp <- 0
for(conBins in 1:(nConBins - 1)) {
den_res_tmp <- den_res_array[, , conBins]
den_res_tmp[!is.finite(den_res_tmp)] <- 0
den_res <- den_res + den_res_tmp / (nConBins * sum(den_res_tmp))
if(tmp < sum(den_res_array[, , conBins]))
max_ind <- conBins

tmp <- sum(den_res_array[, , conBins])
# den_res_tmp <- den_res_array[, , conBins]
# den_res_tmp[!is.finite(den_res_tmp)] <- 0
# den_res <- den_res + den_res_tmp / nConBins
# den_res <- den_res + den_res_tmp / (nConBins * sum(den_res_tmp))
}
den_res <- den_res_array[, , max_ind]

den_x <- rowSums(den_res) # just calculate the sum for each column
} else {
# bandwidth <- c(MASS::bandwidth.nrd(x), MASS::bandwidth.nrd(y))
# if(any(bandwidth == 0)) {
# max_vec <- c(max(x), max(y))
# bandwidth[bandwidth == 0] <- max_vec[bandwidth == 0] / dim_val
# }
# den_xy <- MASS::kde2d(x, y, n = c(dim_val, dim_val), lims = c(min(x), max(x), min(y), max(y)), h = bandwidth)
# den_res <- as.data.frame(den_xy$z)
# # dimnames(den_res) <- list(paste0("x_", as.character(den_xy$x)), paste0("y_", as.character(den_xy$y)))
# # den_x_res <- density(x, n = round(length(x))/ 4, from = min(x), to = max(x))
# # den_x <- den_x_res$y
# den_x <- rowSums(den_res) # just calculate the sum for each column
#
# x_meshgrid <- den_xy$x
# y_meshgrid <- den_xy$y
bandwidth <- c(MASS::bandwidth.nrd(x), MASS::bandwidth.nrd(y))
if(any(bandwidth == 0)) {
max_vec <- c(max(x), max(y))
bandwidth[bandwidth == 0] <- max_vec[bandwidth == 0] / dim_val
}
den_xy <- MASS::kde2d(x, y, n = c(dim_val, dim_val), lims = c(min(x), max(x), min(y), max(y)), h = bandwidth)
den_res <- as.data.frame(den_xy$z)
# dimnames(den_res) <- list(paste0("x_", as.character(den_xy$x)), paste0("y_", as.character(den_xy$y)))
# den_x_res <- density(x, n = round(length(x))/ 4, from = min(x), to = max(x))
# den_x <- den_x_res$y
den_x <- rowSums(den_res) # just calculate the sum for each column

x_meshgrid <- den_xy$x
y_meshgrid <- den_xy$y
}

max_ind <- 1

for(i in 1:length(x_meshgrid)) {
max_val <- max(den_res[i, ] / den_x[i]); min_val <- 0 #min(den_res[i, ] / den_x[i]) #
max_ind <- which.max(den_res[i, ] / den_x[i])

print(den_x)
if(den_x[i] == 0) {
}
else {
max_ind <- which.max(den_res[i, ] / den_x[i])
}
# message("i is ", i, "j is ", j, "vector is ", c(x_meshgrid[i], y_meshgrid[max_ind], gene_pair_name))
ridge_curve[i + r_ini_ind, ] <- c(x_meshgrid[i], y_meshgrid[max_ind], gene_pair_name)

Expand Down
17 changes: 9 additions & 8 deletions Scribe/inst/include/accessary_code.h
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
#include <Rcpp.h>
using namespace Rcpp;

double di_single_run_cpp(NumericMatrix& x, NumericMatrix& y, int n = 10);
double di_single_run_conditioned_cpp(NumericMatrix x, NumericMatrix y, NumericMatrix& z, int n = 10);
double di_single_run_cpp(NumericMatrix& x, NumericMatrix& y, int n = 5);
double di_single_run_conditioned_cpp(NumericMatrix x, NumericMatrix y, NumericMatrix& z, int n = 5);
double rdi_many_runs_cpp(NumericMatrix& x, NumericMatrix& y);
double rdi_single_run_cpp(NumericMatrix& x, NumericMatrix& y, int d = 1);
double rdi_single_run_conditioned_cpp(NumericMatrix& x, NumericMatrix& y, NumericMatrix& z, NumericVector& z_delays, int d = 1);
List calculate_rdi_cpp(NumericMatrix& expr_data, IntegerVector delays, IntegerMatrix& super_graph, IntegerVector& turning_points, int method = 1); // NULL
double lmi_multiple_run(NumericMatrix& x, NumericMatrix& y, int d = 1, IntegerVector run_vec = 0);
List calculate_rdi_cpp(NumericMatrix& expr_data, IntegerVector delays = 1, IntegerMatrix& super_graph, IntegerVector& turning_points = 0, int method = 1); // NULL
NumericMatrix calculate_conditioned_rdi_cpp(NumericMatrix& expr_data, IntegerMatrix& super_graph,
NumericMatrix& max_rdi_value, IntegerMatrix& max_rdi_delays, int k);
NumericMatrix& max_rdi_value, IntegerMatrix& max_rdi_delays, int k = 5);
NumericMatrix smooth_gene(NumericMatrix expr_data, const int window_size = 40);
double rdi_single_run_multiple_run_cpp(NumericMatrix& x, NumericMatrix& y, int d, IntegerVector run_vec);
List calculate_rdi_multiple_run_cpp(NumericMatrix& expr_data, IntegerVector delays, IntegerVector run_vec, IntegerMatrix& super_graph, IntegerVector turning_points, int method); // = NULL
NumericMatrix rdi_multiple_runs_conditioned_cpp(NumericMatrix& x, NumericMatrix& y, NumericMatrix& z, IntegerVector& z_delays, int d, IntegerVector run_vec);
double rdi_single_run_multiple_run_cpp(NumericMatrix& x, NumericMatrix& y, int d = 1, IntegerVector run_vec = 0);
List calculate_rdi_multiple_run_cpp(NumericMatrix& expr_data, IntegerVector delays = 1, IntegerVector run_vec = 0, IntegerMatrix& super_graph, IntegerVector turning_points = 0, int method = 1); // = NULL
NumericMatrix rdi_multiple_runs_conditioned_cpp(NumericMatrix& x, NumericMatrix& y, NumericMatrix& z, IntegerVector& z_delays, int d = 1, IntegerVector run_vec = 0);
NumericMatrix calculate_multiple_run_conditioned_rdi_cpp(NumericMatrix& expr_data, IntegerMatrix& super_graph,
NumericMatrix& max_rdi_value, IntegerMatrix& max_rdi_delays, IntegerVector run_vec, int k)
NumericMatrix& max_rdi_value, IntegerMatrix& max_rdi_delays, IntegerVector run_vec, int k = 5)
30 changes: 30 additions & 0 deletions Scribe/src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,34 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// lmi_multiple_run_cpp
double lmi_multiple_run_cpp(NumericMatrix& x, NumericMatrix& y, int d, IntegerVector run_vec);
RcppExport SEXP _Scribe_lmi_multiple_run_cpp(SEXP xSEXP, SEXP ySEXP, SEXP dSEXP, SEXP run_vecSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< NumericMatrix& >::type x(xSEXP);
Rcpp::traits::input_parameter< NumericMatrix& >::type y(ySEXP);
Rcpp::traits::input_parameter< int >::type d(dSEXP);
Rcpp::traits::input_parameter< IntegerVector >::type run_vec(run_vecSEXP);
rcpp_result_gen = Rcpp::wrap(lmi_multiple_run_cpp(x, y, d, run_vec));
return rcpp_result_gen;
END_RCPP
}
// lmi_multiple_run
double lmi_multiple_run(SEXP x, SEXP y, SEXP d, IntegerVector run_vec);
RcppExport SEXP _Scribe_lmi_multiple_run(SEXP xSEXP, SEXP ySEXP, SEXP dSEXP, SEXP run_vecSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< SEXP >::type x(xSEXP);
Rcpp::traits::input_parameter< SEXP >::type y(ySEXP);
Rcpp::traits::input_parameter< SEXP >::type d(dSEXP);
Rcpp::traits::input_parameter< IntegerVector >::type run_vec(run_vecSEXP);
rcpp_result_gen = Rcpp::wrap(lmi_multiple_run(x, y, d, run_vec));
return rcpp_result_gen;
END_RCPP
}
// rdi_single_run_conditioned
double rdi_single_run_conditioned(SEXP x, SEXP y, SEXP z, SEXP z_delays, SEXP d);
RcppExport SEXP _Scribe_rdi_single_run_conditioned(SEXP xSEXP, SEXP ySEXP, SEXP zSEXP, SEXP z_delaysSEXP, SEXP dSEXP) {
Expand Down Expand Up @@ -384,6 +412,8 @@ static const R_CallMethodDef CallEntries[] = {
{"_Scribe_rdi_many_runs", (DL_FUNC) &_Scribe_rdi_many_runs, 2},
{"_Scribe_rdi_single_run", (DL_FUNC) &_Scribe_rdi_single_run, 3},
{"_Scribe_lmi_single_run", (DL_FUNC) &_Scribe_lmi_single_run, 3},
{"_Scribe_lmi_multiple_run_cpp", (DL_FUNC) &_Scribe_lmi_multiple_run_cpp, 4},
{"_Scribe_lmi_multiple_run", (DL_FUNC) &_Scribe_lmi_multiple_run, 4},
{"_Scribe_rdi_single_run_conditioned", (DL_FUNC) &_Scribe_rdi_single_run_conditioned, 5},
{"_Scribe_extract_max_rdi_value_delay", (DL_FUNC) &_Scribe_extract_max_rdi_value_delay, 2},
{"_Scribe_calculate_rdi_cpp", (DL_FUNC) &_Scribe_calculate_rdi_cpp, 5},
Expand Down
Binary file modified Scribe/src/RcppExports.o
Binary file not shown.
Binary file modified Scribe/src/Scribe.so
Binary file not shown.
Loading

0 comments on commit c04e101

Please sign in to comment.