Merge branch 'feature/PlotOverlap'

mevers · Oct 12, 2019 · cbda5f1 · cbda5f1
2 parents ae0d28d + 1e1a0e6
commit cbda5f1
Show file tree

Hide file tree

Showing 7 changed files with 194 additions and 48 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: RNAModR
 Type: Package
 Title: Functional Analysis of RNA Modifications
-Version: 0.2.2
+Version: 0.2.3
 Author: Maurits Evers <maurits.evers@anu.edu.au>
 Maintainer: Maurits Evers <maurits.evers@anu.edu.au>
 Description: Transcriptome-wide analysis of RNA modifications.
@@ -13,6 +13,7 @@ Imports:
     gplots,
     graphics,
     grDevices,
+    magrittr,
     methods,
     RSQLite,
     S4Vectors,

diff --git a/NAMESPACE b/NAMESPACE
@@ -62,6 +62,7 @@ importFrom(S4Vectors,DataFrame)
 importFrom(S4Vectors,queryHits)
 importFrom(S4Vectors,subjectHits)
 importFrom(beanplot,beanplot)
+importFrom(gplots,venn)
 importFrom(grDevices,col2rgb)
 importFrom(grDevices,rgb)
 importFrom(graphics,abline)
@@ -77,6 +78,7 @@ importFrom(graphics,plot)
 importFrom(graphics,points)
 importFrom(graphics,polygon)
 importFrom(graphics,text)
+importFrom(graphics,title)
 importFrom(stats,binomial)
 importFrom(stats,confint.default)
 importFrom(stats,fisher.test)

diff --git a/R/plot.R b/R/plot.R
@@ -1294,3 +1294,74 @@ PlotRelStartStopEnrichment <- function(txLoc1,
 }
 
 
+#' Plot overlap of sites.
+#'
+#' Plot overlap of sites from two \code{txLoc} objects. See 'Details'.
+#'
+#' The function plots one or multiple Venn diagrams showing the spatial 
+#' overlap between entries from two \code{txLoc} objects. Two features are 
+#' defined as overlapping, if they overlap by at least one nucleotide. 
+#' Overlaps are determined using the function 
+#' \code{GenomicRanges::countOverlaps}.
+#'
+#' @param txLoc1 A \code{txLoc} object.
+#' @param txLoc2 A \code{txLoc} object.
+#'
+#' @return \code{NULL}.
+#'
+#' @author Maurits Evers, \email{maurits.evers@@anu.edu.au}
+#'
+#' @import GenomicRanges IRanges
+#' @importFrom gplots venn
+#' @importFrom graphics title
+PlotOverlap <- function(txLoc1, txLoc2) {
+
+    # Sanity check
+    CheckClassTxLocConsistency(txLoc1, txLoc2)
+
+    # Determine figure panel layout
+    if (length(GetRegions(txLoc1)) < 4) {
+        par(mfrow = c(1, length(GetRegions(txLoc1))))
+    } else {
+        par(mfrow = c(ceiling(length(GetRegions(txLoc1)) / 2), 2))
+    }
+
+    invisible(Map(
+        function(loci1, loci2, region) {
+
+            # Get transcriptome coordinates
+            gr1 <- loci1$locus_in_tx_region
+            gr2 <- loci2$locus_in_tx_region
+
+            # Make sure that seqlevels match
+            lvls <- intersect(seqlevels(gr1), seqlevels(gr2))
+            seqlevels(gr1, pruning.mode = "coarse") <- lvls
+            seqlevels(gr2, pruning.mode = "coarse") <- lvls
+
+            # Count overlap
+            m <- countOverlaps(gr1, gr2)
+            overlap <- sum(m > 0)
+
+            # Plot
+            grps <- list(
+                seq(1, length(gr1)),
+                seq(length(gr1) - overlap + 1, length.out = length(gr2)))
+            names(grps) <- c(
+                sprintf(
+                    "%s (%3.2f%%)",
+                    GetId(txLoc1), overlap / length(gr1) * 100),
+                sprintf(
+                    "%s (%3.2f%%)",
+                    GetId(txLoc2), overlap / length(gr2) * 100))
+            venn(grps)
+            title(region)
+            mtext(names(gr1))
+
+        },
+        GetLoci(txLoc1),
+        GetLoci(txLoc2),
+        GetRegions(txLoc1)))
+
+}
+
+
diff --git a/R/unused.R b/R/unused.R
@@ -1,50 +1,3 @@
-##' Plot overlap of sites.
-##'
-##' Plot overlap of sites from two \code{txLoc} object.
-##' See 'Details'.
-##'
-##' The function plots one or multiple Venn diagrams denoting the
-##' spatial overlap between entries from two \code{txLoc} objects.
-##' Two features are defined as overlapping, if they overlap by
-##' at least one nucleotide. Overlaps are determined using the
-##' function \code{GenomicRanges::countOverlaps}.
-##'
-##'
-##' @param txLoc1 A \code{txLoc} object.
-##' @param txLoc2 A \code{txLoc} object.
-##'
-##' @author Maurits Evers, \email{maurits.evers@@anu.edu.au}
-##'
-##' @import GenomicRanges IRanges
-##' @importFrom gplots venn
-#PlotOverlap <- function(loc1, loc2) {
-#    CheckClassTxLocConsistency(loc1, loc2)
-#    id1 <- GetId(loc1)
-#    id2 <- GetId(loc2)
-#    gr1 <- TxLoc2GRangesList(loc1)
-#    gr2 <- TxLoc2GRangesList(loc2)
-#    if (length(gr1) < 4) {
-#        par(mfrow = c(1, length(gr1)))
-#    } else {
-#        par(mfrow = c(ceiling(length(gr1) / 2), 2))
-#    }
-#    for (i in 1:length(gr1)) {
-#        # Supress warnings of sequences in gr1 not being in gr2
-#        m <- suppressWarnings(countOverlaps(gr1[[i]], gr2[[i]]))
-#        overlap <- length(m[m > 0])
-#        grps <- list(
-#            seq(1, length(gr1[[i]])),
-#            seq(length(gr1[[i]]) - overlap + 1, length.out = length(gr2[[i]])))
-#        names(grps) <- c(sprintf("%s (%3.2f%%)",
-#                                 id1, overlap / length(gr1[[i]]) * 100),
-#                         sprintf("%s (%3.2f%%)",
-#                                 id2, overlap / length(gr2[[i]]) * 100))
-#        venn(grps)
-#        mtext(names(gr1)[i])
-#    }
-#}
-#
-
 ##' Read a DBN file.
 ##'
 ##' Read a DBN file. See 'Details'.

diff --git a/man/PlotOverlap.Rd b/man/PlotOverlap.Rd
diff --git a/vignettes/.gitignore b/vignettes/.gitignore
@@ -0,0 +1,2 @@
+*.html
+*.R
diff --git a/vignettes/Example-analysis.Rmd b/vignettes/Example-analysis.Rmd
@@ -0,0 +1,88 @@
+---
+title: "Example analysis of m¹A sites"
+author: "Maurits Evers"
+output: 
+    rmarkdown::html_vignette:
+        fig_width: 10
+        fig_height: 8
+vignette: >
+  %\VignetteIndexEntry{Example-analysis}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+knitr::opts_chunk$set(
+  collapse = TRUE,
+  comment = "#>"
+)
+```
+
+
+## Load the libraries
+
+We start by loading the `RNAModR` library, along with the convenience library `magrittr`.
+
+```{r setup, warning=FALSE, message=FALSE}
+library(RNAModR)
+library(magrittr)
+```
+
+We read in data for the mRNA modification N1-methyladenosine (m¹A) from [Dominissini et al.](https://www.nature.com/articles/nature16998), which is included in the `RNAModR` package. Data is provided in the [BED6 format](http://genome.ucsc.edu/FAQ/FAQformat#format1), which we can map to transcriptome coordinates using `SmartMap` and select those sites that lie within the CDS and 3'UTR. Note that if we have data in a different format (e.g. GTF), we first need to convert from GTF to BED6.
+
+
+## Load data and show summary
+
+```{r read-data}
+m1A <- system.file(
+    "extdata", "MeRIPseq_m1A_Dominissini2016_hg38.bed", package = "RNAModR") %>%
+    ReadBED() %>%
+    SmartMap(id = "m1A", refGenome = "hg38", showPb = FALSE) %>%
+    FilterTxLoc(filter = c("CDS", "3'UTR"))
+```
+
+We can get a quick summary overview of the number of sites per transcript region with
+
+```{r data-overview}
+m1A
+```
+
+
+## Generate null sites
+
+We now generate a distribution of null sites
+
+```{r generate-null}
+null <- GenerateNull(m1A, nt = "A", showPb = FALSE)
+null
+```
+
+The null sites are all nucleotides `nt` in all transcript regions that contain at least one m¹A site. Consequently, the number of null sites is signficantly larger than the number of m¹A sites. It may make sense to downsample the number of null sites to match the number of m¹A sites in every transcript region, see `?DownsampleTxLoc`.
+
+
+## Enrichment/depletion of m¹A sites within different transcript regions
+
+We explore the spatial distribution of m¹A sites relative to the null sites within the CDS and 3'UTR and perform an enrichment analysis to characterise enrichment/depletion of m¹A sites within bins along the transcript regions.
+
+```{r region-enrichment, out.width = "100%"}
+PlotSpatialEnrichment(m1A, null)
+```
+
+
+## Enrichment/depletion of m¹A sites near YTH
+
+We read in eIF4AIII HITSCLIP data from [Saulière et al.](https://www.nature.com/articles/nsmb.2420), which is included in the `RNAModR` package.
+
+```{r }
+eif4 <- system.file("extdata", "HITSCLIP_eIF4A3_Sauliere2012_hg38.bed", package = "RNAModR") %>%
+    ReadBED() %>%
+    SmartMap(id = "eif4", refGenome = "hg38", showPb = FALSE) %>%
+    FilterTxLoc(filter = c("CDS", "3'UTR"))
+eif4
+```
+
+We can now calculate relative distances between the m¹A and null sites to the nearest eIF4AIII target site; we then perform an enrichment analysis of the two binned distance distributions to characterise enrichment/depletion of m¹A sites relative to null sites as a function of the distance to the nearest eIF4AIII site. Negative distances correspond to an m¹A/null site *upstream* of the eIF4AIII site; positive distances correspond to m¹A/null sites *downstream* of the eIF4AIII site. 
+
+```{r eif4-enrichment, out.width="100%", out.height="60%"}
+PlotRelDistEnrichment(m1A, null, eif4, flank = 155, binWidth = 10)
+```