Skip to content

Commit

Permalink
fixes after check
Browse files Browse the repository at this point in the history
  • Loading branch information
saral98 committed Apr 23, 2024
1 parent f7656af commit 25ed97e
Show file tree
Hide file tree
Showing 20 changed files with 58,990 additions and 59,733 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,8 @@ inst/extdata/plots.R
inst/extdata/replacenawith0.txt
inst/extdata/testPrecomputedDataNA.R
vignettes/CENTRE_vignette.html
inst/extdata/PrecomputedDataAllTests.db
inst/extdata/PrecomputedDataLight.db
inst/extdata/createCombinedTestData.out
inst/extdata/example/HeLa-S3.rds

22 changes: 0 additions & 22 deletions CENTRE.Rproj

This file was deleted.

3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,13 @@ Imports:
RSQLite,
GenomicRanges,
IRanges,
regioneR,
utils,
stats
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
Depends:
R (>= 4.0)
R (>= 4.2.0)
Suggests:
knitr,
testthat (>= 3.0.0),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ importFrom(RSQLite,dbGetQuery)
importFrom(crupR,getEnhancers)
importFrom(crupR,normalize)
importFrom(metapod,combineParallelPValues)
importFrom(regioneR,extendRegions)
importFrom(stats,predict)
importFrom(stats,reshape)
importFrom(xgboost,xgb.DMatrix)
Expand Down
39 changes: 23 additions & 16 deletions R/centrePrediction.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,37 +24,43 @@
#' generic_features <- CENTRE::computeGenericFeatures(pairs)
#'
#' #Compute Cell-type features
#' files <- c(system.file("extdata/example","HeLa_H3K4me1.REF_chr19.bam", package = "CENTRE"),
#' system.file("extdata/example","HeLa_H3K4me3.REF_chr19.bam", package = "CENTRE"),
#' system.file("extdata/example","HeLa_H3K27ac.REF_chr19.bam", package = "CENTRE"))
#' files <- c(system.file("extdata/example","HeLa_H3K4me1.REF_chr19.bam",
#' package = "CENTRE"),
#' system.file("extdata/example","HeLa_H3K4me3.REF_chr19.bam",
#' package = "CENTRE"),
#' system.file("extdata/example","HeLa_H3K27ac.REF_chr19.bam",
#' package = "CENTRE"))
#' # Control ChIP-seq experiment to go with the rest of ChIP-seqs
#' inputs <- system.file("extdata/example", "HeLa_input.REF_chr19.bam", package = "CENTRE")
#' inputs <- system.file("extdata/example", "HeLa_input.REF_chr19.bam",
#' package = "CENTRE")
#' metaData <- data.frame(HM = c("H3K4me1", "H3K4me3", "H3K27ac"),
#' condition = c(1, 1, 1), replicate = c(1, 1, 1),
#' bamFile = files, inputFile = rep(inputs, 3))
#'tpmfile <- read.table(system.file("extdata/example", "HeLa-S3.tsv", package = "CENTRE"),
#' sep = "", stringsAsFactors = F, header = T)
#'tpmfile <- read.table(system.file("extdata/example", "HeLa-S3.tsv",
#' package = "CENTRE"),
#' sep = "", stringsAsFactors = FALSE, header = TRUE)
#'celltype_features <- CENTRE::computeCellTypeFeatures(metaData,
#' replicate = 1,
#' input.free = FALSE,
#' cores = 1,
#' sequencing = "single",
#' tpmfile = tpmfile,
#' featuresGeneric = generic_features)
#' pairs = pairs)

#'# Finally compute the predictions
#'predictions <- centrePrediction(celltype_features,
#'generic_features)
#'predictions <- centrePrediction(celltype_features, generic_features)
#' @export
#' @importFrom stats predict
#' @import utils
#' @importFrom xgboost xgb.load xgb.DMatrix
#'
centrePrediction <- function(features_celltype,features_generic, model = NULL){
centrePrediction <- function(features_celltype,
features_generic,
model = NULL) {
#Merge the generic features and the cell type features

start_time <- Sys.time()
features_generic$distance <- abs(features_generic$distance) # make distance absolute distance
features_generic$distance <- abs(features_generic$distance)
#generate the pair id to merge both feature sets
features_generic$pair <- paste(features_generic$enhancer_id,
features_generic$gene_id2,
Expand All @@ -74,8 +80,10 @@ centrePrediction <- function(features_celltype,features_generic, model = NULL){
features_generic, by.x = "pair", by.y = "pair")

##Loading the xgboost model
if (is.null(model)){
model <- system.file("extdata", "centre2_final_model.txt", package = "CENTRE")
if (is.null(model)) {
model <- system.file("extdata",
"centre2_final_model.txt",
package = "CENTRE")
} else {
check_file(model)
}
Expand All @@ -85,14 +93,13 @@ centrePrediction <- function(features_celltype,features_generic, model = NULL){
pairs <- features_all[, 1]
features_all <- features_all[, -1]

feature_matrix <- xgb.DMatrix(data.matrix(features_all))
#test <- xgboost::xgb.DMatrix(data = feature_matrix)
feature_matrix <- xgboost::xgb.DMatrix(data.matrix(features_all))
##Predicting
score <- predict(xgb_model, feature_matrix)
label <- as.numeric(score > 0.5)
#Add the gene and enhancer id's
predictions <- cbind(pairs, score, label)
predictions <- as.data.frame(predictions)
cat(paste0('time: ', format(Sys.time() - start_time), "\n"))
cat(paste0("time: ", format(Sys.time() - start_time), "\n"))
return(predictions)
}
38 changes: 22 additions & 16 deletions R/computeCellTypeFeatures.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
#'
#' @param metaData Dataframe indicating the paths to the ChIP-seq experiments.
#' More information on the format here `crupR::normalize
#' @param replicate: The number of replicates of the ChIP-seq experiments
#' @param replicate The number of replicates of the ChIP-seq experiments
#' that need to be normalized.
#' @param input.free: Boolean value indicating whether a Control/Input ChIP-seq
#' experiment is provided to go with the Histone Modification ChIP-seq experiments.
#' @param input.free Boolean value indicating whether a Control/Input ChIP-seq
#' experiment is provided to go with the Histone Modification ChIP-seq
#' experiments.
#' If the parameter is set to FALSE the normalization of ChIP-seq experiments
#' will be run in input.free mode.
#' @param cores Number of cores to compute the CRUP score features
Expand All @@ -28,10 +29,10 @@
#' @return
#' A table containting the following computed features :
#'* CRUP enhancer score for enhancer region, promoter region and the region
#'between the enhancer and the promoter
#'between the enhancer and the promoter.
#'* CRUP promoter score for enhancer region, promoter region and the region
#'between the enhancer and the promoter
#'* RNA-seq TPM values
#'between the enhancer and the promoter.
#'* TPM values from the RNA-seq experiment given.
#'
#'
#' @examples
Expand All @@ -46,23 +47,28 @@
#' generic_features <- CENTRE::computeGenericFeatures(pairs)
#'
#' #Compute Cell-type features
#' files <- c(system.file("extdata/example","HeLa_H3K4me1.REF_chr19.bam", package = "CENTRE"),
#' system.file("extdata/example","HeLa_H3K4me3.REF_chr19.bam", package = "CENTRE"),
#' system.file("extdata/example","HeLa_H3K27ac.REF_chr19.bam", package = "CENTRE"))
#' files <- c(system.file("extdata/example","HeLa_H3K4me1.REF_chr19.bam",
#' package = "CENTRE"),
#' system.file("extdata/example","HeLa_H3K4me3.REF_chr19.bam",
#' package = "CENTRE"),
#' system.file("extdata/example","HeLa_H3K27ac.REF_chr19.bam",
#' package = "CENTRE"))
#' # Control ChIP-seq experiment to go with the rest of ChIP-seqs
#' inputs <- system.file("extdata/example", "HeLa_input.REF_chr19.bam", package = "CENTRE")
#' inputs <- system.file("extdata/example", "HeLa_input.REF_chr19.bam",
#' package = "CENTRE")
#' metaData <- data.frame(HM = c("H3K4me1", "H3K4me3", "H3K27ac"),
#' condition = c(1, 1, 1), replicate = c(1, 1, 1),
#' bamFile = files, inputFile = rep(inputs, 3))
#'tpmfile <- read.table(system.file("extdata/example", "HeLa-S3.tsv", package = "CENTRE"),
#' sep = "", stringsAsFactors = F, header = T)
#'tpmfile <- read.table(system.file("extdata/example", "HeLa-S3.tsv",
#' package = "CENTRE"),
#' sep = "\t", stringsAsFactors = FALSE, header = TRUE)
#'celltype_features <- CENTRE::computeCellTypeFeatures(metaData,
#' replicate = 1,
#' input.free = FALSE,
#' cores = 1,
#' sequencing = "single",
#' tpmfile = tpmfile,
#' pairs = generic_features)
#' pairs = pairs)
#'
#'@export
#'@importFrom crupR normalize getEnhancers
Expand Down Expand Up @@ -96,6 +102,7 @@ computeCellTypeFeatures <- function(metaData,
crupScores <- crupR::getEnhancers(data = normalized, C = cores, all = TRUE)
crupScores <- crupScores$D
## check what parts of this are necessary
colnames(pairs) <- c("gene_id2", "enhancer_id")
listEnh <- as.data.frame(unique(pairs$enhancer_id))
colnames(listEnh) <- c("enhancer_id")
listProm <- as.data.frame(unique(pairs$gene_id2))
Expand Down Expand Up @@ -134,7 +141,7 @@ computeCellTypeFeatures <- function(metaData,
# Compute the promoter probability from probA and probE
# In CRUP probA is the probability of a region being an active reg. element
# probE is the probability of a region being an active enhancer
crupScores$probP <- crupScores$probA *(1 - crupScores$probE)
crupScores$probP <- crupScores$probA * (1 - crupScores$probE)

cat("Getting the CRUP-PP scores for enhancer")

Expand Down Expand Up @@ -170,7 +177,6 @@ computeCellTypeFeatures <- function(metaData,
features_table_all <- get_rnaseq(crupFeatures, tpmfile)

features_table_all[is.na(features_table_all)] <- 0

features_table_all <- features_table_all[, c("gene_id2",
"enhancer_id",
"EP_prob_enh.1",
Expand Down Expand Up @@ -199,7 +205,7 @@ computeCellTypeFeatures <- function(metaData,
"norm_reg_dist_prom",
"TPM")]

cat(paste0('time: ', format(Sys.time() - startTime), "\n"))
cat(paste0("time: ", format(Sys.time() - startTime), "\n"))
endPart()
return(features_table_all)

Expand Down
6 changes: 3 additions & 3 deletions R/computeGenericFeatures.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ computeGenericFeatures <- function(pairs) {
featuresDistances <- computeDistances(pairs)

cat("Removing pairs with distance over 500 Kb")
featuresDistances <- featuresDistances[abs(featuresDistances$distance)
featuresDistances <- featuresDistances[featuresDistances$distance
<= 500000, ]
endPart()

Expand All @@ -49,7 +49,7 @@ computeGenericFeatures <- function(pairs) {

conn <- RSQLite::dbConnect(RSQLite::SQLite(),
system.file("extdata",
"PrecomputedData2.db",
"PrecomputedDataLight.db",
package = "CENTRE"))

combinedTestDf <- getPrecomputedValues("combinedTestData",
Expand All @@ -71,6 +71,6 @@ computeGenericFeatures <- function(pairs) {
"combined_tests")]

featuresGeneric[is.na(featuresGeneric)] <- 0 ##NA values
cat(paste0('time: ', format(Sys.time() - startTime), "\n"))
cat(paste0("time: ", format(Sys.time() - startTime), "\n"))
return(featuresGeneric)
}
14 changes: 7 additions & 7 deletions R/createPairs.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ createPairs <- function(gene) {
conn <- RSQLite::dbConnect(RSQLite::SQLite(),
system.file("extdata",
"Annotation.db",
package = "CENTRE"))
package = "CENTRE"))
#get chromosome and tts of our genes

query <- paste("SELECT gene_id1, chr, transcription_start FROM gencode WHERE gene_id1 in (",
paste0(sprintf("'%s'", gene$gene_id1), collapse = ", "),")",sep="" )
paste0(sprintf("'%s'", gene$gene_id1), collapse = ", "), ")", sep = "")
gene <- RSQLite::dbGetQuery(conn, query)

#Select all of the annotation for ccres v3
Expand All @@ -54,10 +54,10 @@ createPairs <- function(gene) {

#extend the gene region 500Kb to the left of TTS and to the right
genesRange <- regioneR::extendRegions(genesRange,
extend.start=500000,
extend.end=500000)
extend.start = 500000,
extend.end = 500000)

enhancerRange<- with(ccresEnhancer,
enhancerRange <- with(ccresEnhancer,
GenomicRanges::GRanges(V1,
IRanges::IRanges(start = new_start,
end = new_end)))
Expand All @@ -67,7 +67,7 @@ createPairs <- function(gene) {
overlaps <- GenomicRanges::findOverlaps(genesRange, enhancerRange,
ignore.strand = TRUE)

ccresOverlapping <-data.frame(gene = overlaps@from, enhancer = overlaps@to)
ccresOverlapping <- data.frame(gene = overlaps@from, enhancer = overlaps@to)
ccresOverlapping$gene_id1 <- gene$gene_id1[ccresOverlapping$gene]
ccresOverlapping$enhancer_id <- ccresEnhancer$V5[ccresOverlapping$enhancer]

Expand All @@ -76,6 +76,6 @@ createPairs <- function(gene) {

### add a function to exclude any pairs that are not in the same chromosome

cat(paste0('time: ', format(Sys.time() - startTime), "\n"))
cat(paste0("time: ", format(Sys.time() - startTime), "\n"))
return(ccresOverlapping)
}
63 changes: 21 additions & 42 deletions R/downloadPrecomputedData.R
Original file line number Diff line number Diff line change
@@ -1,53 +1,32 @@
#' Download PrecomputedData.db
#'
#' Downloads the PrecomputedData.db database that is needed for the
#' Download PrecomputedDataLight.db
#'
#' Downloads the PrecomputedDataLight.db database that is needed for the
#' computeGenericFeatures() function. This databased contains the values
#' of the Wilcoxon Rank tests and the CRUP correlations.
#'
#'
#' @param method Method to be used for downloading files. Check download.file()
#' manuals to see the current available methods
#' @return Integer code 0 if download was succesful and the file was saved in the
#' correct directory. In the case of failure errors will be raised.
#' manuals to see the current available methods
#' @param all Boolean value. When set to true, the PrecomputedDataAllTests.db
#' database will be downloaded too. This database holds all of the p-values
#' which are combined in "combined_tests" column.
#' @return Integer code 0 if download was succesful and the file was saved in
#' the correct directory. In the case of failure errors will be raised.
#' @examples
#' # methods accepts "internal", "wininet" (Windows only) ,"wget", "curl" etc.
#' # Assuming the user has wget available
#' downloadPrecomputedData(method="wget")
#' @export
#' @import utils

downloadPrecomputedData <- function(method) {
start_time <- Sys.time()
url <- "http://owww.molgen.mpg.de/~CENTRE_data/PrecomputedData.db"
cat("Downloading PrecomputedData.db\n")
exit <- download.file(url,
destfile=paste(system.file("extdata",package = "CENTRE")
, "PrecomputedData.db",
sep = "/") ,
method = method)
if (exit != 0 ) {
stop("Download of PrecomputedData.db failed. Non-zero exit status.")
}

f <- system.file("extdata","PrecomputedData.db", package = "CENTRE")
if (!file.exists(f)){
stop("Download of of PrecomputedData.db failed or file was saved in the wrong directory.")
}
##Download sysdata.rda
url_sys <- "http://owww.molgen.mpg.de/~CENTRE_data/Annotation.db"
cat("Downloading sysdata.rda\n")
exit_sys <- download.file(url_sys,
destfile=paste(system.file("extdata",
package="CENTRE"), "Annotation.db", sep = "/"),
method = method)
if (exit_sys != 0 ) {
stop("Download Annotation.db failed. Non-zero exit status.")
}

fs <- system.file("extdata","PrecomputedData.db", package = "CENTRE")
if (!file.exists(fs)){
stop("Download Annotation.db failed or file was saved in the wrong directory.")
}

cat(paste0('time: ', format(Sys.time() - start_time), "\n"))
return(0)
downloadPrecomputedData <- function(method, all = FALSE) {
start_time <- Sys.time()
#Download PrecomputedDataLight.db
downloader("PrecomputedDataLight.db", method)
#Download Annotation.db
downloader("Annotation.db", method)
if (all == TRUE) {
downloader("PrecomputedAllTests.db", method)
}
cat(paste0("time: ", format(Sys.time() - start_time), "\n"))
return(0)
}
Loading

0 comments on commit 25ed97e

Please sign in to comment.