Skip to content

Commit

Permalink
Thank you @hpages for the fixes on BC devel
Browse files Browse the repository at this point in the history
Merge branch 'master' of git.bioconductor.org:packages/methrix

# Conflicts:
#	DESCRIPTION
  • Loading branch information
PoisonAlien committed May 4, 2021
2 parents b6be3f3 + 93d4504 commit 219929f
Show file tree
Hide file tree
Showing 15 changed files with 56 additions and 56 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: methrix
Title: Fast and efficient summarization of generic bedGraph files from Bisufite sequencing
Version: 1.4.07
Version: 1.5.1
Authors@R: c(
person("Anand","Mayakonda", role = c("aut", "cre"), email = "anand_mt@hotmail.com", comment = c(ORCID = "0000-0003-1162-687X")),
person("Reka","Toth", role = "aut", email = "r.toth@dkfz.de", comment = c(ORCID = "0000-0002-6096-1052")),
Expand Down
2 changes: 1 addition & 1 deletion R/accessory_funcs.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Tiny function to check if object is h5
is_h5 = function(m) {
return(m@metadata$is_h5)
return(metadata(m)$is_h5)
}

get_source_idx = function(protocol = NULL) {
Expand Down
2 changes: 1 addition & 1 deletion R/methrix_object.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ setMethod(f = "show", signature = "methrix", definition = function(object) {
cat(paste0(" n_CpGs: ", format(nrow(object), big.mark = ","), "\n"))
cat(paste0("n_samples: ", ncol(object), "\n"))
cat(paste0(" is_h5: ", is_h5(object), "\n"))
cat(paste0("Reference: ", object@metadata$genome, "\n"))
cat(paste0("Reference: ", metadata(object)$genome, "\n"))
})

# Create methrix obj
Expand Down
50 changes: 25 additions & 25 deletions R/methrix_operations.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#' @param overlap_type defines the type of the overlap of the CpG sites with the target region. Default value is `within`. For detailed description,
#' see the \code{findOverlaps} function of the \code{\link{IRanges}} package.
#' @param na_rm Remove NA's? Default \code{TRUE}
#' @param elementMetadata.col columns in \code{\link{methrix}}@elementMetadata which needs to be summarised. Default = NULL.
#' @param elementMetadata.col columns in \code{rowData(\link{methrix})} which needs to be summarised. Default = NULL.
#' @param n_chunks Number of chunks to split the \code{\link{methrix}} object in case it is very large. Default = 1.
#' @param n_cores Number of parallel instances. \code{n_cores} should be less than or equal to \code{n_chunks}. If \code{n_chunks} is not specified, then \code{n_chunks} is initialized to be equal to \code{n_cores}. Default = 1.
#' @param verbose Default TRUE
Expand Down Expand Up @@ -41,7 +41,7 @@ get_region_summary = function(m, regions = NULL, type = "M", how = "mean", overl

target_regions = (regions)
#Add a unique id for every target range (i.e, rows)
target_regions@elementMetadata$rid <- paste0("rid_", 1:length(target_regions))
mcols(target_regions)$rid <- paste0("rid_", 1:length(target_regions))

r_dat = as.data.frame(rowData(x = m))
r_dat$seqnames <- as.character(r_dat$chr)
Expand All @@ -54,7 +54,7 @@ get_region_summary = function(m, regions = NULL, type = "M", how = "mean", overl
r_dat <- GenomicRanges::makeGRangesFromDataFrame(r_dat, keep.extra.columns = FALSE)


if(!all(elementMetadata.col %in% colnames(m@elementMetadata))) stop("variables provided to elementMetadata.col not correct")
if(!all(elementMetadata.col %in% colnames(rowData(m)))) stop("variables provided to elementMetadata.col not correct")

if(verbose){
message("-Checking for overlaps..\n")
Expand Down Expand Up @@ -280,7 +280,7 @@ coverage_filter <- function(m, cov_thr = 1, min_samples = 1, prop_samples=0, gro
stop("min_samples and prop_samples variables are not numeric.")
}

if (!is.null(group) && !(group %in% colnames(m@colData))){
if (!is.null(group) && !(group %in% colnames(colData(m)))){
stop(paste("The column name ", group, " can't be found in colData. Please provid a valid group column."))
}

Expand All @@ -296,11 +296,11 @@ coverage_filter <- function(m, cov_thr = 1, min_samples = 1, prop_samples=0, gro
if (n_chunks == 1) {
cov_dat = get_matrix(m = m, type = "C")
if (!is.null(group)) {
row_idx <- sapply(unique(m@colData[, group]), function(c) {
res <- DelayedMatrixStats::rowSums2(cov_dat[, m@colData[,
row_idx <- sapply(unique(colData(m)[, group]), function(c) {
res <- DelayedMatrixStats::rowSums2(cov_dat[, colData(m)[,
group] == c] >= cov_thr, na.rm = TRUE)
row_idx <- (res >= max(min_samples, ceiling(prop_samples *
sum(m@colData[, group] == c))))
sum(colData(m)[, group] == c))))
})
row_idx <- DelayedMatrixStats::rowAlls(row_idx)
} else {
Expand All @@ -314,9 +314,9 @@ coverage_filter <- function(m, cov_thr = 1, min_samples = 1, prop_samples=0, gro
function(i) {
cov_dat = get_matrix(m[((i - 1) * ceiling(nrow(m)/n_chunks) + 1):min(i * ceiling(nrow(m)/n_chunks), nrow(m)), ],
type = "C")
row_idx <- sapply(unique(m@colData[, group]), function(c) {
res <- DelayedMatrixStats::rowSums2(cov_dat[, m@colData[,group] == c] >= cov_thr, na.rm = TRUE)
row_idx <- (res >= max(min_samples, ceiling(prop_samples * sum(m@colData[, group] == c))))
row_idx <- sapply(unique(colData(m)[, group]), function(c) {
res <- DelayedMatrixStats::rowSums2(cov_dat[, colData(m)[,group] == c] >= cov_thr, na.rm = TRUE)
row_idx <- (res >= max(min_samples, ceiling(prop_samples * sum(colData(m)[, group] == c))))
})
row_idx <- DelayedMatrixStats::rowAlls(row_idx)
}))
Expand All @@ -333,10 +333,10 @@ coverage_filter <- function(m, cov_thr = 1, min_samples = 1, prop_samples=0, gro
} else {
cov_dat = get_matrix(m = m, type = "C")
if (!is.null(group)) {
row_idx <- sapply(unique(m@colData[, group]), function(c) {
res <- matrixStats::rowSums2(cov_dat[, m@colData[, group] ==
row_idx <- sapply(unique(colData(m)[, group]), function(c) {
res <- matrixStats::rowSums2(cov_dat[, colData(m)[, group] ==
c] >= cov_thr, na.rm = TRUE)
row_idx <- (res >= max(min_samples, ceiling(prop_samples * sum(m@colData[, group] == c))))
row_idx <- (res >= max(min_samples, ceiling(prop_samples * sum(colData(m)[, group] == c))))
})
row_idx <- matrixStats::rowAlls(row_idx)
} else {
Expand Down Expand Up @@ -436,7 +436,7 @@ methrix2bsseq <- function(m) {

b <- bsseq::BSseq(M = M_clean, Cov = get_matrix(m, type = "C"), pData = colData(x = m),
pos = rowData(x = m)[, "start"], chr = rowData(x = m)[, "chr"],
sampleNames = rownames(m@colData))
sampleNames = rownames(colData(m)))
b
}

Expand Down Expand Up @@ -593,11 +593,11 @@ mask_methrix <- function(m, low_count = NULL, high_quantile = 0.99, n_cores=1) {
quantiles <- simplify2array(mclapply(mc.cores = n_cores, 1:ncol(assays(m)[[2]]),
function(i) quantile(assays(m)[[2]][, i], probs = high_quantile, na.rm = TRUE)))}
quantiles <- as.vector(quantiles)
names(quantiles) <- rownames(m@colData)
names(quantiles) <- rownames(colData(m))
} else {
quantiles <- matrixStats::colQuantiles(assays(m)[[2]], probs = high_quantile, na.rm = TRUE)
quantiles <- as.vector(quantiles)
names(quantiles) <- rownames(m@colData)
names(quantiles) <- rownames(colData(m))
}


Expand Down Expand Up @@ -648,19 +648,19 @@ combine_methrix <- function(m1, m2, by = c("row", "col")) {
by <- match.arg(arg = by, choices = c("row", "col"), several.ok = FALSE)

if (by == "row") {
if (nrow(colData(m1))!= nrow(colData(m2)) || !(all(rownames(m1@colData) == rownames(m2@colData)))) {
if (nrow(colData(m1))!= nrow(colData(m2)) || !(all(rownames(colData(m1)) == rownames(colData(m2))))) {
stop("You have different samples in your dataset. You need the same samples in your datasets. ")
} else {
m <- rbind(m1, m2)
}
if (any(duplicated((as.data.table(m@elementMetadata))))) {
if (any(duplicated((as.data.table(rowData(m)))))) {
stop("There are overlapping regions in your datasets. This function only takes distinct objects. ")
}
}
if (by == "col") {
if (any(rownames(m1@colData) %in% rownames(m2@colData))) {
if (any(rownames(colData(m1)) %in% rownames(colData(m2)))) {
stop("You have the same samples in your datasets. You need different samples for this merging. ")
} else if (!identical(m1@elementMetadata, m2@elementMetadata)) {
} else if (!identical(rowData(m1), rowData(m2))) {
stop("You have to have the same regions in your datasets. ")
} else {
m <- cbind(m1, m2)
Expand Down Expand Up @@ -728,7 +728,7 @@ get_stats <- function(m, per_chr = TRUE) {
idcol = "Sample_Name")
stats <- merge(meth_stat, cov_stat, by = c("chr", "Sample_Name"))
colnames(stats)[1] <- "Chromosome"
stats$Chromosome <- factor(x = stats$Chromosome, levels = m@metadata$chrom_sizes$contig)
stats$Chromosome <- factor(x = stats$Chromosome, levels = metadata(m)$chrom_sizes$contig)
} else {
if (is_h5(m)) {
cov_stat <- data.table::data.table(Sample_Name = colnames(m),
Expand Down Expand Up @@ -842,7 +842,7 @@ convert_HDF5_methrix <- function(m = NULL) {

assays(m)[[1]] <- as.matrix(assays(m)[[1]])
assays(m)[[2]] <- as.matrix(assays(m)[[2]])
m@metadata$is_h5 <- FALSE
metadata(m)$is_h5 <- FALSE
return(m)
}

Expand All @@ -866,8 +866,8 @@ convert_methrix <- function(m = NULL) {
}

m <- create_methrix(beta_mat = assays(m)[[1]], cov_mat = assays(m)[[2]],
cpg_loci = m@elementMetadata, is_hdf5 = TRUE, genome_name = m@metadata$genome,
col_data = m@colData, chrom_sizes = m@metadata$chrom_sizes, ref_cpg_dt = m@metadata$ref_CpG,
desc = m@metadata$descriptive_stats)
cpg_loci = rowData(m), is_hdf5 = TRUE, genome_name = metadata(m)$genome,
col_data = colData(m), chrom_sizes = metadata(m)$chrom_sizes, ref_cpg_dt = metadata(m)$ref_CpG,
desc = metadata(m)$descriptive_stats)
return(m)
}
8 changes: 4 additions & 4 deletions R/methrix_plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ prepare_plot_data <- function(m, ranges = NULL, n_cpgs = 25000, pheno = NULL){

if (!is.null(pheno)) {
if (pheno %in% colnames(colData(m))) {
colnames(meth_sub) <- as.character(m@colData[, pheno])
colnames(meth_sub) <- as.character(colData(m)[, pheno])
} else {
stop("Please provide a valid phenotype annotation column.")
}
Expand Down Expand Up @@ -366,8 +366,8 @@ plot_coverage <- function(m, type = c("hist", "dens"), pheno = NULL, perGroup =
if (nrow(m) > size.lim) {
message("The dataset is bigger than the size limit. A random subset of the object will be used that contains ~",
size.lim, " observations.")
n_rows <- trunc(size.lim/nrow(m@colData))
sel_rows <- sample(seq_len(nrow(m@elementMetadata)), size = n_rows,
n_rows <- trunc(size.lim/nrow(colData(m)))
sel_rows <- sample(seq_len(nrow(rowData(m))), size = n_rows,
replace = FALSE)

meth_sub <- methrix::get_matrix(m = m[sel_rows, ], type = "C",
Expand All @@ -384,7 +384,7 @@ plot_coverage <- function(m, type = c("hist", "dens"), pheno = NULL, perGroup =
if (pheno %in% colnames(colData(m)) == 0) {
stop("Phenotype annotation cannot be found in colData(m).")
}
colnames(meth_sub) <- m@colData[, pheno]
colnames(meth_sub) <- colData(m)[, pheno]
}

meth_sub <- as.data.frame(meth_sub)
Expand Down
14 changes: 7 additions & 7 deletions R/methrix_report.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ methrix_report <- function(meth, output_dir = NULL, recal_stats = FALSE,
if (!recal_stats) {
warning("If input methrix is a subsetted version of original methrix object, set recal_stats to TRUE",
immediate. = TRUE)
if (is.null(meth@metadata$descriptive_stats)) {
if (is.null(metadata(meth)$descriptive_stats)) {
stop("No previous statistics is available. Set recal_stats to TRUE.")
}
}
Expand Down Expand Up @@ -56,7 +56,7 @@ methrix_report <- function(meth, output_dir = NULL, recal_stats = FALSE,
per_chr_stat <- get_stats(m = meth, per_chr = TRUE)
gc()
} else {
per_chr_stat <- meth@metadata$descriptive_stats$chr_stat
per_chr_stat <- metadata(meth)$descriptive_stats$chr_stat
colnames(per_chr_stat)[which(colnames(per_chr_stat) == "chr")] <- "Chromosome"
}
data.table::fwrite(x = per_chr_stat, file = of1, sep = "\t")
Expand All @@ -76,7 +76,7 @@ methrix_report <- function(meth, output_dir = NULL, recal_stats = FALSE,
genome_stat <- get_stats(m = meth, per_chr = FALSE)
gc()
} else {
genome_stat <- meth@metadata$descriptive_stats$genome_stat
genome_stat <- metadata(meth)$descriptive_stats$genome_stat
}
data.table::fwrite(x = genome_stat, file = of2, sep = "\t")
}
Expand All @@ -88,7 +88,7 @@ methrix_report <- function(meth, output_dir = NULL, recal_stats = FALSE,
} else {
of3 <- suppressWarnings(normalizePath(file.path(output_dir, "n_covered_per_chr.tsv")))
}
contig_nCpGs <- meth@metadata$ref_CpG
contig_nCpGs <- metadata(meth)$ref_CpG
colnames(contig_nCpGs) <- c("chr", "total_CpGs")
if (file.exists(of3)) {
message("File already present. Skipping step 3..")
Expand All @@ -108,7 +108,7 @@ methrix_report <- function(meth, output_dir = NULL, recal_stats = FALSE,
#non_cov_tbl[, `:=`(n_covered, total_CpGs - n_non_covered)]
#non_cov_tbl[, `:=`(n_non_covered, NULL)]
} else {
non_cov_tbl <- data.table::melt(meth@metadata$descriptive_stats$n_cpgs_covered,
non_cov_tbl <- data.table::melt(metadata(meth)$descriptive_stats$n_cpgs_covered,
id.vars = "chr")
colnames(non_cov_tbl) <- c("chr", "Sample_Name", "n_covered")
non_cov_tbl <- merge(non_cov_tbl, contig_nCpGs, by = "chr",
Expand Down Expand Up @@ -147,7 +147,7 @@ methrix_report <- function(meth, output_dir = NULL, recal_stats = FALSE,
mf_chr_summary <- mf_chr_summary[na_vec == FALSE]
mf_chr_summary[, `:=`(na_vec, NULL)]
colnames(mf_chr_summary) <- c("chr", "n_CpG")
mf_chr_summary <- merge(meth@metadata$ref_CpG, mf_chr_summary)
mf_chr_summary <- merge(metadata(meth)$ref_CpG, mf_chr_summary)
colnames(mf_chr_summary)[2] <- c("total_CpGs")
mf_chr_summary[, `:=`(fract_CpG, n_CpG/total_CpGs)]
data.table::fwrite(x = mf_chr_summary, file = of4, sep = "\t")
Expand Down Expand Up @@ -196,7 +196,7 @@ methrix_report <- function(meth, output_dir = NULL, recal_stats = FALSE,
of5 <- suppressWarnings(normalizePath(file.path(output_dir, "contig_lens.tsv")))
}

data.table::fwrite(x = meth@metadata$chrom_sizes, file = of5, sep = "\t")
data.table::fwrite(x = metadata(meth)$chrom_sizes, file = of5, sep = "\t")

message(paste0("Knitting report"))
md <- system.file("report", "summarize_methrix.Rmd", package = "methrix")
Expand Down
4 changes: 2 additions & 2 deletions R/snp_removal.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ remove_snps <- function(m, populations = NULL, maf_threshold = 0.01, reduce_filt

start_proc_time <- proc.time()

genome <- m@metadata$genome
genome <- metadata(m)$genome
chr <- m2 <- NULL


Expand Down Expand Up @@ -143,7 +143,7 @@ remove_snps <- function(m, populations = NULL, maf_threshold = 0.01, reduce_filt
snp_rows <- c(snp_rows, snp_test)
}

removed_snps <- data.table::as.data.table(m@elementMetadata)[snp_rows, ]
removed_snps <- data.table::as.data.table(rowData(m))[snp_rows, ]

message("Number of SNPs removed:")
print(removed_snps[, .N, by = chr])
Expand Down
6 changes: 3 additions & 3 deletions R/write_bigwigs.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ write_bigwigs = function(m, output_dir = getwd(), samp_names = NULL){

mat_gr <- methrix::get_matrix(m = m, type = "M", add_loci = TRUE, in_granges = TRUE)

seql = m@metadata$chrom_sizes$length
names(seql) = m@metadata$chrom_sizes$contig
seql = metadata(m)$chrom_sizes$length
names(seql) = metadata(m)$chrom_sizes$contig

all_samps = names(mcols(mat_gr))

Expand All @@ -42,4 +42,4 @@ write_bigwigs = function(m, output_dir = getwd(), samp_names = NULL){
rtracklayer::export(samp_gr, con = paste0(output_dir, "/", samp, ".bw"), format="bigWig")
}
message("----------------------")
}
}
Binary file modified data/methrix_data.rda
Binary file not shown.
2 changes: 1 addition & 1 deletion inst/script/data_generation.R
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ usethis::use_data(methrix_data, overwrite = TRUE)

meth <- get_matrix(methrix_data, type="M")
cov <- get_matrix(methrix_data, type="C")
gr <- methrix_data@elementMetadata
gr <- rowData(methrix_data)


#create M and U values
Expand Down
2 changes: 1 addition & 1 deletion man/get_region_summary.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions tests/testthat/test-get_matrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ data("methrix_data")
m1 <- methrix_data
m2 <- convert_methrix(methrix_data)

dt <- as.data.frame(cbind(m1@elementMetadata, m1@assays$data$beta))
dt <- as.data.frame(cbind(rowData(m1), assay(m1, "beta")))
data.table::setDT(x = dt)
dt2 <- assays(m1)$beta

dt_c <- setDT(as.data.frame(cbind(m1@elementMetadata, m1@assays$data$cov)))
dt_c <- setDT(as.data.frame(cbind(rowData(m1), assay(m1, "cov"))))
dt2_c <- assays(m1)$cov


Expand All @@ -19,11 +19,11 @@ test_that("Check output", {
expect_equivalent(get_matrix(m = m1, type = "C",add_loci = FALSE, in_granges = FALSE), dt2_c)
expect_equivalent(get_matrix(m = m1, type = "C",add_loci = TRUE, in_granges = FALSE), dt_c)
})
dt_gr <- as.data.frame(cbind(m1@elementMetadata, m1@assays$data$beta))
dt_gr <- as.data.frame(cbind(rowData(m1), assay(m1, "beta")))
dt_gr$end <- dt_gr$start+2
dt_gr <- GenomicRanges::makeGRangesFromDataFrame(dt_gr, keep.extra.columns = TRUE)

dt_c_gr <- as.data.frame(cbind(m1@elementMetadata, m1@assays$data$cov))
dt_c_gr <- as.data.frame(cbind(rowData(m1), assay(m1, "cov")))
dt_c_gr$end <- dt_c_gr$start+2
dt_c_gr <- GenomicRanges::makeGRangesFromDataFrame(dt_c_gr, keep.extra.columns = TRUE)

Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-get_region_summary.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

library(GenomicRanges)

gr <- GenomicRanges::GRanges(paste0(methrix_data@elementMetadata[1,1], ":", methrix_data@elementMetadata[1,2], "-", methrix_data@elementMetadata[3,2]))
gr <- GenomicRanges::GRanges(paste0(rowData(methrix_data)[1,1], ":", rowData(methrix_data)[1,2], "-", rowData(methrix_data)[3,2]))
data("methrix_data")


Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-region_filter.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ data('methrix_data')
m2 <- convert_methrix(methrix_data)


gr <- GenomicRanges::GRanges(paste0(methrix_data@elementMetadata[1,1], ":", methrix_data@elementMetadata[1,2], "-", methrix_data@elementMetadata[3,2]+1))
gr <- GenomicRanges::GRanges(paste0(rowData(methrix_data)[1,1], ":", rowData(methrix_data)[1,2], "-", rowData(methrix_data)[3,2]+1))
gr_df <- as.data.frame(gr)
gr_df2 <- gr_df
colnames(gr_df2)[1] <- "chr"
Expand Down
Loading

0 comments on commit 219929f

Please sign in to comment.