-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added createPairs, computefeaturesGeneric and computeFeaturesCellTYpe
- Loading branch information
saral98
committed
Jul 11, 2022
1 parent
50a6906
commit 8f04d83
Showing
17 changed files
with
667 additions
and
368 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(compute_features) | ||
export(computeCellTypeFeatures) | ||
export(createPairs) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
centrePrediction <- function(features_generic, features_celltype, model){ | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
#' Compute cell type specific features | ||
#' | ||
#' Computes the cell type specific features needed for the CENTRE classification | ||
#' step. | ||
#' | ||
#' @param file Path to a file with gene and enhancer pairs. | ||
#' @param metaData Dataframe indicating the paths to the ChIP-seq experiments. | ||
#' More information on the format here `crupR::normalize()` | ||
#' @param cores Number of cores to compute the CRUP score features | ||
#' @param tpmpath Path to a file with the RNA-seq TPM values, with the names of | ||
#' the genes given as ENSEMBLE ID's | ||
#' | ||
#' @return | ||
#' A table containting the following computed features : | ||
#'* CRUP enhancer score for enhancer region, promoter region and the region between the enhancer and the promoter | ||
#'* CRUP promoter score for enhancer region, promoter region and the region between the enhancer and the promoter | ||
#'* RNA-seq TPM values | ||
#' | ||
#' | ||
#' @export | ||
#' | ||
#' @examples | ||
#'files <- c( | ||
#'"/project/CRUP_scores/CRUP_scores/ENCODE/Single_ended/Tissues/body_pancreas/H3K4me1/H3K4me1.bam", | ||
#'"/project/CRUP_scores/CRUP_scores/ENCODE/Single_ended/Tissues/body_pancreas/H3K4me3/H3K4me3.bam", | ||
#'"/project/CRUP_scores/CRUP_scores/ENCODE/Single_ended/Tissues/body_pancreas/H3K27ac/H3K27ac.bam" ) | ||
#' | ||
#'features_generics <- "/project/CRUP_scores/CRUP_scores/ENCODE/Single_ended/Tissues/body_pancreas/Controls/features_generic.bam" | ||
#' | ||
#'metaData <- data.frame(HM = c("H3K4me1","H3K4me3","H3K27ac"), | ||
#' condition = c(1,1,1), replicate = c(1,1,1), | ||
#' bamFile = files, features_genericFile = rep(features_generics,3)) | ||
#'file <- "/project/CRUP_scores/EPI/example1.txt" | ||
#'compute_features(file, metaData, cores) | ||
#' | ||
#' | ||
computeCellTypeFeatures <- function(metaData, cores, sequencing, tpmpath, | ||
features_generic){ | ||
## Pre-eliminary checks and computations | ||
check_file(tpmpath) | ||
tpmfile <- read.table(tpmpath, sep = "", stringsAsFactors = F, header = T) | ||
|
||
|
||
## Computing the crup scores | ||
startPart("Computing CRUP score features") | ||
|
||
## Calling normalization step only on the chromosomes we have | ||
chr <- unique(features_generic$chr) | ||
print(head(features_generic)) | ||
normalized <- crupR::normalize(metaData = metaData, | ||
condition = 1, | ||
replicate = 1, | ||
genome = "hg38", | ||
sequencing = "single", | ||
chroms = chr, | ||
cores = cores) | ||
#Get CRUP enhancer probabilities | ||
crup_scores_enh <- crupR::getEnhancers(data = normalized, cores = cores) | ||
crup_scores_enh <- crup_scores_enh$data_matrix | ||
|
||
|
||
##Making the ranges for the enhancers | ||
list_enh <- as.data.frame(unique(features_generic$enhancer_id)) | ||
colnames(list_enh) <- c("enhancer_id") | ||
|
||
regions_enhancer <- merge(list_enh, | ||
ccres_enhancer[,c('V1', 'V5', 'new_start', 'new_end')], | ||
by.x='enhancer_id', | ||
by.y='V5') | ||
|
||
|
||
##Making the ranges for the genes | ||
|
||
list_prom <- as.data.frame(unique(features_generic$gene_id2)) | ||
colnames(list_prom) <- c("gene_id2") | ||
regions_prom <- merge(list_prom, | ||
gencode[,c('chr','gene_id1','new_start', 'new_end')], | ||
by.x='gene_id2', | ||
by.y='gene_id1') | ||
|
||
|
||
cat("Getting the CRUP-EP scores for enhancer, promoter and the regulatory | ||
distance") | ||
|
||
crup_EP_enh <- compute_crup_enhancer(regions_enhancer, list_enh, crup_scores_enh) | ||
|
||
crup_features <-merge(features_generic, | ||
crup_EP_enh, | ||
by.x="enhancer_id", | ||
by.y="cres_name", | ||
all.x=TRUE) | ||
|
||
crup_EP_prom <- compute_crup_promoter(regions_prom,list_prom, crup_scores_enh) | ||
|
||
crup_features <-merge(crup_features, | ||
crup_EP_prom, | ||
by.x="gene_id2", | ||
by.y="gene_name", | ||
all.x=TRUE) | ||
|
||
crup_features <- compute_crup_reg_distance(crup_features, crup_scores_enh) | ||
|
||
|
||
##Get CRUP promoter probabilities | ||
crup_scores_prom <- crupR::getEnhancers(data = normalized, cores = cores, promprob = T) | ||
crup_scores_prom <- crup_scores_prom$data_matrix | ||
|
||
cat("Getting the CRUP-PP scores for enhancer") | ||
|
||
|
||
|
||
crup_PP_enh <- compute_crup_enhancer(regions_enhancer, list_enh, crup_scores_prom) | ||
|
||
crup_features <-merge(crup_features, | ||
crup_PP_enh, | ||
by.x="enhancer_id", | ||
by.y="cres_name", | ||
all.x=TRUE) | ||
|
||
crup_PP_prom <- compute_crup_promoter(regions_prom, list_prom, crup_scores_prom) | ||
crup_features <-merge(crup_features, | ||
crup_PP_prom, | ||
by.x="gene_id2", | ||
by.y="gene_name", | ||
all.x=TRUE) | ||
|
||
crup_features <- compute_crup_reg_distance(crup_features, crup_scores_prom) | ||
|
||
|
||
endPart() | ||
|
||
startPart("Getting the TPM values") | ||
features_table_all <- get_rnaseq(crup_features, tpmfile) | ||
|
||
|
||
features_table_all[is.na(features_table_all)] <- 0 | ||
|
||
endPart() | ||
return(features_table_all) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
#' Compute generic features | ||
#' | ||
#' Computes the generic features needed by CENTRE to make predictions | ||
#' | ||
#' @param file Path to a file with either genes and enhancer pairs. | ||
#' @param metaData Dataframe indicating the paths to the ChIP-seq experiments. | ||
#' More information on the format here `crupR::normalize()` | ||
#' | ||
#' | ||
#' @return | ||
#' A table containting the following computed features : | ||
#'* distance: Distance between gene and enhancer | ||
#'* combined_tests: Combined value of the Wilcoxon tests (CAGE, DNase | ||
#'expression, CRUP expression and DNase DNase) | ||
#'* crup_COR: CRUP correlation scores | ||
#' | ||
#' @examples | ||
#' | ||
#' | ||
#' | ||
computeGenericFeatures <- function(file){ | ||
## Pre-eliminary checks and computations | ||
|
||
check_file(file) | ||
x <- read.table(file, sep = "\t", stringsAsFactors = F) | ||
|
||
x$gene_id1 <- gsub("\\..*","",x[,1]) | ||
|
||
## Computing the distance features | ||
|
||
startPart("Computing distance features") | ||
|
||
|
||
colnames(x) <- c("gene_id", "enhancer_id", "gene_id2") | ||
features_distances <- distances_gene_enhancer(x) | ||
|
||
cat("Removing pairs with distance over 500 Kb") | ||
features_distances <- features_distances[abs(features_distances$distance) | ||
<= 500000,] | ||
endPart() | ||
|
||
## Getting the values for the Wilcoxon tests and the CRUP correlations | ||
startPart("Get Wilcoxon tests and CRUP correlations") | ||
|
||
features_distances$pair <- paste(features_distances$enhancer_id, | ||
features_distances$gene_id2, | ||
sep = "_") | ||
wilcoxon_features <- wilcoxon_test_crup_cor(features_distances) | ||
|
||
## Combining the values of the Wilcoxon tests | ||
|
||
wilcoxon_features$combined_tests <- scran::combinePValues( | ||
wilcoxon_features$cage_wilcoxon_test, | ||
wilcoxon_features$dhsexp_wilcoxon_test, | ||
wilcoxon_features$crupexp_wilcoxon_test, | ||
wilcoxon_features$dhsdhs_wilcoxon_test, | ||
method="fisher") | ||
|
||
wilcoxon_features$combined_tests <-log(wilcoxon_features$combined_tests) | ||
|
||
## Return the table of features | ||
features_table <- wilcoxon_features[,c("gene_id2", "enhancer_id", "chr", | ||
"middle_point", "transcription_start", | ||
"distance","cor_CRUP", "combined_tests")] | ||
|
||
features_table[is.na(features_table)] <- 0 | ||
|
||
return(features_table) | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.