Skip to content

Commit

Permalink
Merge branch 'feature/PlotOverlap'
Browse files Browse the repository at this point in the history
  • Loading branch information
mevers committed Oct 12, 2019
2 parents ae0d28d + 1e1a0e6 commit cbda5f1
Show file tree
Hide file tree
Showing 7 changed files with 194 additions and 48 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: RNAModR
Type: Package
Title: Functional Analysis of RNA Modifications
Version: 0.2.2
Version: 0.2.3
Author: Maurits Evers <maurits.evers@anu.edu.au>
Maintainer: Maurits Evers <maurits.evers@anu.edu.au>
Description: Transcriptome-wide analysis of RNA modifications.
Expand All @@ -13,6 +13,7 @@ Imports:
gplots,
graphics,
grDevices,
magrittr,
methods,
RSQLite,
S4Vectors,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ importFrom(S4Vectors,DataFrame)
importFrom(S4Vectors,queryHits)
importFrom(S4Vectors,subjectHits)
importFrom(beanplot,beanplot)
importFrom(gplots,venn)
importFrom(grDevices,col2rgb)
importFrom(grDevices,rgb)
importFrom(graphics,abline)
Expand All @@ -77,6 +78,7 @@ importFrom(graphics,plot)
importFrom(graphics,points)
importFrom(graphics,polygon)
importFrom(graphics,text)
importFrom(graphics,title)
importFrom(stats,binomial)
importFrom(stats,confint.default)
importFrom(stats,fisher.test)
Expand Down
71 changes: 71 additions & 0 deletions R/plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -1294,3 +1294,74 @@ PlotRelStartStopEnrichment <- function(txLoc1,
}


#' Plot overlap of sites.
#'
#' Plot overlap of sites from two \code{txLoc} objects. See 'Details'.
#'
#' The function plots one or multiple Venn diagrams showing the spatial
#' overlap between entries from two \code{txLoc} objects. Two features are
#' defined as overlapping, if they overlap by at least one nucleotide.
#' Overlaps are determined using the function
#' \code{GenomicRanges::countOverlaps}.
#'
#' @param txLoc1 A \code{txLoc} object.
#' @param txLoc2 A \code{txLoc} object.
#'
#' @return \code{NULL}.
#'
#' @author Maurits Evers, \email{maurits.evers@@anu.edu.au}
#'
#' @import GenomicRanges IRanges
#' @importFrom gplots venn
#' @importFrom graphics title
PlotOverlap <- function(txLoc1, txLoc2) {

# Sanity check
CheckClassTxLocConsistency(txLoc1, txLoc2)

# Determine figure panel layout
if (length(GetRegions(txLoc1)) < 4) {
par(mfrow = c(1, length(GetRegions(txLoc1))))
} else {
par(mfrow = c(ceiling(length(GetRegions(txLoc1)) / 2), 2))
}

invisible(Map(
function(loci1, loci2, region) {

# Get transcriptome coordinates
gr1 <- loci1$locus_in_tx_region
gr2 <- loci2$locus_in_tx_region

# Make sure that seqlevels match
lvls <- intersect(seqlevels(gr1), seqlevels(gr2))
seqlevels(gr1, pruning.mode = "coarse") <- lvls
seqlevels(gr2, pruning.mode = "coarse") <- lvls

# Count overlap
m <- countOverlaps(gr1, gr2)
overlap <- sum(m > 0)

# Plot
grps <- list(
seq(1, length(gr1)),
seq(length(gr1) - overlap + 1, length.out = length(gr2)))
names(grps) <- c(
sprintf(
"%s (%3.2f%%)",
GetId(txLoc1), overlap / length(gr1) * 100),
sprintf(
"%s (%3.2f%%)",
GetId(txLoc2), overlap / length(gr2) * 100))
venn(grps)
title(region)
mtext(names(gr1))

},
GetLoci(txLoc1),
GetLoci(txLoc2),
GetRegions(txLoc1)))

}


47 changes: 0 additions & 47 deletions R/unused.R
Original file line number Diff line number Diff line change
@@ -1,50 +1,3 @@
##' Plot overlap of sites.
##'
##' Plot overlap of sites from two \code{txLoc} object.
##' See 'Details'.
##'
##' The function plots one or multiple Venn diagrams denoting the
##' spatial overlap between entries from two \code{txLoc} objects.
##' Two features are defined as overlapping, if they overlap by
##' at least one nucleotide. Overlaps are determined using the
##' function \code{GenomicRanges::countOverlaps}.
##'
##'
##' @param txLoc1 A \code{txLoc} object.
##' @param txLoc2 A \code{txLoc} object.
##'
##' @author Maurits Evers, \email{maurits.evers@@anu.edu.au}
##'
##' @import GenomicRanges IRanges
##' @importFrom gplots venn
#PlotOverlap <- function(loc1, loc2) {
# CheckClassTxLocConsistency(loc1, loc2)
# id1 <- GetId(loc1)
# id2 <- GetId(loc2)
# gr1 <- TxLoc2GRangesList(loc1)
# gr2 <- TxLoc2GRangesList(loc2)
# if (length(gr1) < 4) {
# par(mfrow = c(1, length(gr1)))
# } else {
# par(mfrow = c(ceiling(length(gr1) / 2), 2))
# }
# for (i in 1:length(gr1)) {
# # Supress warnings of sequences in gr1 not being in gr2
# m <- suppressWarnings(countOverlaps(gr1[[i]], gr2[[i]]))
# overlap <- length(m[m > 0])
# grps <- list(
# seq(1, length(gr1[[i]])),
# seq(length(gr1[[i]]) - overlap + 1, length.out = length(gr2[[i]])))
# names(grps) <- c(sprintf("%s (%3.2f%%)",
# id1, overlap / length(gr1[[i]]) * 100),
# sprintf("%s (%3.2f%%)",
# id2, overlap / length(gr2[[i]]) * 100))
# venn(grps)
# mtext(names(gr1)[i])
# }
#}
#

##' Read a DBN file.
##'
##' Read a DBN file. See 'Details'.
Expand Down
29 changes: 29 additions & 0 deletions man/PlotOverlap.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions vignettes/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.html
*.R
88 changes: 88 additions & 0 deletions vignettes/Example-analysis.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
---
title: "Example analysis of m¹A sites"
author: "Maurits Evers"
output:
rmarkdown::html_vignette:
fig_width: 10
fig_height: 8
vignette: >
%\VignetteIndexEntry{Example-analysis}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
```


## Load the libraries

We start by loading the `RNAModR` library, along with the convenience library `magrittr`.

```{r setup, warning=FALSE, message=FALSE}
library(RNAModR)
library(magrittr)
```

We read in data for the mRNA modification N1-methyladenosine (m¹A) from [Dominissini et al.](https://www.nature.com/articles/nature16998), which is included in the `RNAModR` package. Data is provided in the [BED6 format](http://genome.ucsc.edu/FAQ/FAQformat#format1), which we can map to transcriptome coordinates using `SmartMap` and select those sites that lie within the CDS and 3'UTR. Note that if we have data in a different format (e.g. GTF), we first need to convert from GTF to BED6.


## Load data and show summary

```{r read-data}
m1A <- system.file(
"extdata", "MeRIPseq_m1A_Dominissini2016_hg38.bed", package = "RNAModR") %>%
ReadBED() %>%
SmartMap(id = "m1A", refGenome = "hg38", showPb = FALSE) %>%
FilterTxLoc(filter = c("CDS", "3'UTR"))
```

We can get a quick summary overview of the number of sites per transcript region with

```{r data-overview}
m1A
```


## Generate null sites

We now generate a distribution of null sites

```{r generate-null}
null <- GenerateNull(m1A, nt = "A", showPb = FALSE)
null
```

The null sites are all nucleotides `nt` in all transcript regions that contain at least one m¹A site. Consequently, the number of null sites is signficantly larger than the number of m¹A sites. It may make sense to downsample the number of null sites to match the number of m¹A sites in every transcript region, see `?DownsampleTxLoc`.


## Enrichment/depletion of m¹A sites within different transcript regions

We explore the spatial distribution of m¹A sites relative to the null sites within the CDS and 3'UTR and perform an enrichment analysis to characterise enrichment/depletion of m¹A sites within bins along the transcript regions.

```{r region-enrichment, out.width = "100%"}
PlotSpatialEnrichment(m1A, null)
```


## Enrichment/depletion of m¹A sites near YTH

We read in eIF4AIII HITSCLIP data from [Saulière et al.](https://www.nature.com/articles/nsmb.2420), which is included in the `RNAModR` package.

```{r }
eif4 <- system.file("extdata", "HITSCLIP_eIF4A3_Sauliere2012_hg38.bed", package = "RNAModR") %>%
ReadBED() %>%
SmartMap(id = "eif4", refGenome = "hg38", showPb = FALSE) %>%
FilterTxLoc(filter = c("CDS", "3'UTR"))
eif4
```

We can now calculate relative distances between the m¹A and null sites to the nearest eIF4AIII target site; we then perform an enrichment analysis of the two binned distance distributions to characterise enrichment/depletion of m¹A sites relative to null sites as a function of the distance to the nearest eIF4AIII site. Negative distances correspond to an m¹A/null site *upstream* of the eIF4AIII site; positive distances correspond to m¹A/null sites *downstream* of the eIF4AIII site.

```{r eif4-enrichment, out.width="100%", out.height="60%"}
PlotRelDistEnrichment(m1A, null, eif4, flank = 155, binWidth = 10)
```

0 comments on commit cbda5f1

Please sign in to comment.