Skip to content

Commit

Permalink
version bump
Browse files Browse the repository at this point in the history
Merge branch 'master' into RELEASE_3_12

# Conflicts:
#	DESCRIPTION
  • Loading branch information
PoisonAlien committed Feb 9, 2021
2 parents 82d505c + 3f01422 commit b622315
Show file tree
Hide file tree
Showing 10 changed files with 65 additions and 33 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: methrix
Title: Fast and efficient summarization of generic bedGraph files from Bisufite sequencing
Version: 1.4.0
Version: 1.4.05
Authors@R: c(
person("Anand","Mayakonda", role = c("aut", "cre"), email = "anand_mt@hotmail.com", comment = c(ORCID = "0000-0003-1162-687X")),
person("Reka","Toth", role = "aut", email = "r.toth@dkfz.de", comment = c(ORCID = "0000-0002-6096-1052")),
Expand Down
8 changes: 7 additions & 1 deletion NEWS
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
CHANGES IN VERSION 1.4.00 (For Bioconductor 3.12 release)
CHANGES IN VERSION 1.4.05
-------------------------
o rotate x axis labels by 45 degrees in report
o update `methrix_pca()` screeplot
o added CITATION

CHANGES IN VERSION 1.4.00 (Bioconductor version)
-------------------------
o read_bedgraphs() supports bedgraphs files from "Bismark_cov", "MethylDackel", "MethylcTools", "BisSNP", "BSseeker2_CGmap"
o write_bedgraphs() supports output to multiBed and metilene file formats
Expand Down
8 changes: 4 additions & 4 deletions R/accessory_funcs.R
Original file line number Diff line number Diff line change
Expand Up @@ -448,9 +448,9 @@ non_vect_code <- function(files, col_idx, coldata, verbose = TRUE, genome = NULL
file_uncovered = file_uncovered, zero_based = zero_based)

DelayedArray::write_block(block = as.matrix(b$bdg[, .(beta)]),
viewport = grid[[i]], x = M_sink)
viewport = grid[[i]], sink = M_sink)
DelayedArray::write_block(block = as.matrix(b$bdg[, .(cov)]),
viewport = grid[[i]], x = cov_sink)
viewport = grid[[i]], sink = cov_sink)
genome_stat_final <- b$genome_stat[, `:=`(Sample_Name,
rownames(coldata)[i])]
chr_stat_final <- b$chr_stat[, `:=`(Sample_Name, rownames(coldata)[i])]
Expand All @@ -463,9 +463,9 @@ non_vect_code <- function(files, col_idx, coldata, verbose = TRUE, genome = NULL
file_uncovered = file_uncovered, zero_based = zero_based)

DelayedArray::write_block(block = as.matrix(b$bdg[, .(beta)]),
viewport = grid[[i]], x = M_sink)
viewport = grid[[i]], sink = M_sink)
DelayedArray::write_block(block = as.matrix(b$bdg[, .(cov)]),
viewport = grid[[i]], x = cov_sink)
viewport = grid[[i]], sink = cov_sink)
genome_stat_final <- rbind(genome_stat_final, b$genome_stat[,
`:=`(Sample_Name, rownames(coldata)[i])])
chr_stat_final <- rbind(chr_stat_final, b$chr_stat[, `:=`(Sample_Name,
Expand Down
6 changes: 3 additions & 3 deletions R/extract_CpGs.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ extract_CPGs = function(ref_genome = NULL) {
ref_build = NA
}

chrom_sizes = data.table::data.table(contig = names(seqlengths(x = ref_genome)),
length = seqlengths(x = ref_genome))
chrs = names(ref_genome)
chrom_sizes = data.table::data.table(contig = standardChromosomes(ref_genome),
length = seqlengths(x = ref_genome)[names(seqlengths(x = ref_genome)) %in% standardChromosomes(ref_genome)])
chrs = standardChromosomes(ref_genome)

message("-Extracting CpGs")

Expand Down
20 changes: 8 additions & 12 deletions R/methrix_plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -219,18 +219,14 @@ methrix_pca <- function(m, var = "top", top_var = 1000, ranges = NULL,
# Draw cumulative variance explained by PCs

if (do_plot) {
par(bty = "n", mgp = c(2.5, 0.5, 0), mar = c(3, 4, 2, 2) + 0.1,
tcl = -0.25, las = 1)
plot(pc_vars, type = "h", col = "red", xlab = "", ylab = "variance Explained",
ylim = c(0, 1), yaxs = "i")
mtext(side = 1, "Principal component", line = 2)
cum_var <- cumsum(meth_pca$sdev^2)/sum(meth_pca$sdev^2) * meth_pca$sdev[1]^2/sum(meth_pca$sdev^2)
lines(cumsum(cum_var), type = "s")
axis(side = 4, at = pretty(c(0, 1)), labels = pretty(c(0, 1)))
legend("topright", col = c("red", "black"), lty = 1, c("Per PC", "Cumulative"), bty = "n")
#lines(x = c(length(meth_pca$sdev), n_pc, n_pc), y = c(cum_var[n_pc], cum_var[n_pc], 0), lty = 3)
title(main = paste0("Variance explained by ", n_pc, " PC: ", round(sum(c(meth_pca$sdev^2/sum(meth_pca$sdev^2))[seq_len(n_pc)]),
digits = 2)), adj = 0)
par(mar = c(3, 4, 1, 4))
b = barplot(height = pc_vars, names.arg = NA, col = "#2c3e50", ylim = c(0, 1), las = 2, axes = FALSE, ylab = "Variance Explained")
points(x = b, y = cumsum(pc_vars), type = 'l', lty = 2, lwd = 1.2, xpd = TRUE, col = "#c0392b")
points(x = b, y = cumsum(pc_vars), type = 'p', pch = 19, xpd = TRUE, col = "#c0392b")
mtext(text = paste0("PC", 1:length(pc_vars)), side = 1, at = b, las = 2, line = 0.5, cex = 0.8)
axis(side = 2, at = seq(0, 1, 0.1), line = 0, las = 2, cex.axis = 0.8)
axis(side = 4, at = seq(0, 1, 0.1), line = 0, las = 2, cex.axis = 0.8)
legend(x = "topleft", legend = "Cumulative", col = "#c0392b", pch = 19, lwd = 1, cex = 0.75, bty = "n")
}
#-----------------------------------------------------------------------------------------------------------------------

Expand Down
16 changes: 10 additions & 6 deletions R/read_bedgraphs.R
Original file line number Diff line number Diff line change
Expand Up @@ -101,24 +101,28 @@ read_bedgraphs <- function(files = NULL, pipeline = NULL, zero_based = TRUE,
}
}

if (is.null(contigs)) {
# Work with only main contigs (either with chr prefix - UCSC style, or
# ensemble style)
contigs <- c(paste0("chr", c(seq_len(22), "X", "Y", "M")), seq_len(22),
"X", "Y", "MT")
}


# Extract CpG's
conig_lens <- NA
if (is(ref_cpgs, "list") & all(names(ref_cpgs) == c("cpgs", "contig_lens",
"release_name"))) {
if (is.null(contigs)) {
contigs <- ref_cpgs$contig_lens$contig
}
conig_lens <- ref_cpgs$contig_lens[contig %in% contigs]
genome <- data.table::copy(x = ref_cpgs$cpgs)
} else if (any(is(ref_cpgs[1], "character"), is(ref_cpgs[1], "BSgenome"))) {
genome <- extract_CPGs(ref_genome = ref_cpgs)
if (is.null(contigs)) {
contigs <- genome$contig_lens$contig
}
conig_lens <- genome$contig_lens
genome <- genome$cpgs
} else if (is(ref_cpgs[1], "data.frame")) {
if (is.null(contigs)) {
contigs <- unique(genome$chr)
}
genome <- data.table::copy(x = ref_cpgs)
data.table::setkey(x = genome, chr, start)
} else {
Expand Down
9 changes: 5 additions & 4 deletions R/snp_removal.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ remove_snps <- function(m, populations = NULL, maf_threshold = 0.01, reduce_filt
} else {
stop("Only hg19 and hg38 genomes are currently supported..\n")
}

if (is.null(populations)) {
populations <- GenomicScores::populations(mafdb)
} else if (!all(populations %in% GenomicScores::populations(mafdb))) {
Expand All @@ -70,14 +71,14 @@ remove_snps <- function(m, populations = NULL, maf_threshold = 0.01, reduce_filt
}


regions <- gr.nochr(GenomicRanges::makeGRangesFromDataFrame(elementMetadata(m), start.field = "start", end.field = "start"))

regions <- GenomicRanges::makeGRangesFromDataFrame(elementMetadata(m), start.field = "start", end.field = "start")
seqlevelsStyle(regions) <- "NCBI"

if(n_cores==1) {
snp_rows<-which((rowSums(as.matrix(score(mafdb, regions, pop = populations))>= maf_threshold,na.rm=T)>0) |
snp_rows <- which((rowSums(as.matrix(score(mafdb, regions, pop = populations))>= maf_threshold,na.rm=T)>0) |
(rowSums(as.matrix(score(mafdb, IRanges::shift(regions, 1), pop = populations))>= maf_threshold,na.rm=T)>0))
} else {
snp_rows<-which(unlist(mclapply(mc.cores=n_cores,1:n_chunks, function(i){
snp_rows <- which(unlist(mclapply(mc.cores=n_cores,1:n_chunks, function(i){
sub_dat = regions[((i-1)*ceiling(length(regions)/n_chunks)+1):min(i*ceiling(length(regions)/n_chunks),length(regions))]
(rowSums(as.matrix(score(mafdb, sub_dat, pop = populations))>= maf_threshold,na.rm=T)>0) |
(rowSums(as.matrix(score(mafdb, IRanges::shift(sub_dat, 1), pop = populations))>= maf_threshold,na.rm=T)>0)
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ Bedgraph files generated by BS pipelines often come in various flavors. Critical

* A exemplary complete data analysis with steps from reading in to annotation and differential methylation calling can be find in our [WGBS best practices workflow](https://compepigen.github.io/methrix_docs/articles/methrix.html).

### Citation

**_Mayakonda A, Schönung M, Hey J, Batra RN, Feuerstein-Akgoz C, Köhler K, Lipka DB, Sotillo R, Plass C, Lutsik P, Toth R. Methrix: an R/bioconductor package for systematic aggregation and analysis of bisulfite sequencing data. Bioinformatics. 2020 Dec 21:btaa1048. doi: 10.1093/bioinformatics/btaa1048. Epub ahead of print. PMID: [33346800](https://pubmed.ncbi.nlm.nih.gov/33346800/)._**

### Installation

```r
Expand Down
21 changes: 21 additions & 0 deletions inst/CITATION
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
citHeader("methrix can be cited as:")

citEntry(
entry="article",
title = "Methrix: an R/Bioconductor package for systematic aggregation and analysis of bisulfite sequencing data",
author = personList(as.person("Anand Mayakonda"),
as.person("Maximilian Schonung"),
as.person("Joschka Hey"),
as.person("Rajbir Nath Batra1"),
as.person("Clarissa Feuerstein-Akgoz"),
as.person("Kristin Kohler"),
as.person("Daniel B. Lipka"),
as.person("Rocio Sotillo"),
as.person("Christoph Plass"),
as.person("Pavlo Lutsik"),
as.person("Reka Toth")),
journal = "Bioinformatics",
year = 2020,
doi = "https://doi.org/10.1093/bioinformatics/btaa1048",
textVersion = "Anand Mayakonda, Maximilian Schonung, Joschka Hey1, Rajbir Nath Batra, Clarissa Feuerstein-Akgoz1, Kristin Kohler, Daniel B. Lipka, Rocio Sotillo, Christoph Plass, Pavlo Lutsik and Reka Toth. 2020. Methrix: an R/Bioconductor package for systematic aggregation and analysis of bisulfite sequencing data. Bioinformatics. https://doi.org/10.1093/bioinformatics/btaa1048"
)
4 changes: 2 additions & 2 deletions inst/report/summarize_methrix.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,13 @@ contig_lens = data.table::fread(input = params$chr_lens)
chr_order = contig_lens$contig[contig_lens$contig %in% unique(n_covered[,chr])]
n_covered[, chr := factor(as.character(chr), levels = chr_order)]
p = ggplot(data = n_covered, aes(x = chr, y = n_covered, label = Sample_Name))+geom_boxplot()+geom_jitter(size = 0.6)+theme_minimal()+theme(axis.title.x = element_blank(), axis.title.y = element_blank())
p = ggplot(data = n_covered, aes(x = chr, y = n_covered, label = Sample_Name))+geom_boxplot()+geom_jitter(size = 0.6)+theme_minimal()+theme(axis.title.x = element_blank(), axis.title.y = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1))
plotly::ggplotly(p = p)
```

### Fraction
```{r}
p = ggplot(data = n_covered, aes(x = chr, y = fraction, label = Sample_Name))+geom_boxplot()+geom_jitter(size = 0.6)+theme_minimal()+theme(axis.title.x = element_blank(), axis.title.y = element_blank())
p = ggplot(data = n_covered, aes(x = chr, y = fraction, label = Sample_Name))+geom_boxplot()+geom_jitter(size = 0.6)+theme_minimal()+theme(axis.title.x = element_blank(), axis.title.y = element_blank(), axis.text.x = element_text(angle = 45, hjust = 1))
plotly::ggplotly(p = p)
```
Number of reference CpGs covered by each sample in each chromosome
Expand Down

0 comments on commit b622315

Please sign in to comment.