Skip to content

Latest commit

 

History

History
339 lines (276 loc) · 13.9 KB

04_correlations.md

File metadata and controls

339 lines (276 loc) · 13.9 KB
library(tidyverse)

## ── Attaching packages ──────────────────────────────────────────── tidyverse 1.3.0 ──

## ✓ ggplot2 3.3.0.9000     ✓ purrr   0.3.3     
## ✓ tibble  2.1.3          ✓ dplyr   0.8.3     
## ✓ tidyr   1.0.0          ✓ stringr 1.4.0     
## ✓ readr   1.3.1          ✓ forcats 0.4.0

## ── Conflicts ─────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(corrr)
library(cowplot)

## 
## ********************************************************

## Note: As of version 1.0.0, cowplot does not change the

##   default ggplot2 theme anymore. To recover the previous

##   behavior, execute:
##   theme_set(theme_cowplot())

## ********************************************************

library(Hmisc) # for correlations with pvalue

## Loading required package: lattice

## Loading required package: survival

## Loading required package: Formula

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:dplyr':
## 
##     src, summarize

## The following objects are masked from 'package:base':
## 
##     format.pval, units

library(ggrepel)


source("./figureoptions.R")
source("./functions_RNAseq.R")

Sample information and PC1

# read the sample data, set levels, join iwth behvior PCA data
colData <- read.csv("../data/00_colData.csv", row.names = 1, stringsAsFactors = T) 
pca.Rn <- read_csv("../data/suppltable-3.csv") %>% dplyr::filter(trialNum == 9)

## Parsed with column specification:
## cols(
##   ID = col_character(),
##   treatment = col_character(),
##   trial = col_character(),
##   trialNum = col_double(),
##   PC1 = col_double(),
##   PC2 = col_double()
## )

colData <- left_join(colData, pca.Rn)

## Joining, by = c("ID", "treatment")

## Warning: Column `ID` joining factor and character vector, coercing into
## character vector

## Warning: Column `treatment` joining factor and character vector, coercing into
## character vector

head(colData)

##       ID subfield        treatment training     trial trialNum        PC1
## 1 15143A      CA3 conflict.trained  trained Retention        9 -0.2275039
## 2 15143A       DG conflict.trained  trained Retention        9 -0.2275039
## 3 15143B      CA1   conflict.yoked    yoked Retention        9 -3.1436627
## 4 15143B       DG   conflict.yoked    yoked Retention        9 -3.1436627
## 5 15143C      CA1 standard.trained  trained Retention        9  5.8860390
## 6 15143D      CA1   standard.yoked    yoked Retention        9 -2.7532719
##           PC2
## 1  3.03543738
## 2  3.03543738
## 3 -0.48834291
## 4 -0.48834291
## 5  0.33235063
## 6 -0.07584809

# read all count data prep to join with sample colData

combinePCvsd <- function(filename, whichsubfield, whichgenes){
  
  
  vsd <- read.csv(filename, row.names = 1, check.names = F) 
  vsd$gene <- row.names(vsd)
  vsd$gene <- str_to_title(vsd$gene)
  vsd <- as.data.frame(vsd)
  row.names(vsd) <- vsd$gene
  vsd$gene <- NULL
  vsd <- as.data.frame(t(vsd))
  vsd$sample <- row.names(vsd)
  vsd$mouse <- sapply(strsplit(as.character(vsd$sample),"\\-"), "[", 1)
  vsd$ID <- paste(15, vsd$mouse, sep = "")
  
  vsd <- left_join(colData, vsd) %>%
    select(ID:training, 
           PC1, PC2,
           whichgenes) %>%
    filter(subfield == whichsubfield) 
  print(head(vsd))
  return(vsd)
}

candidategenes <- c("Prkcz", "Prkci", "Wwc1", "Grin1", "Gria1",
         "Pick1", "Nsf", "Fmr1", "Camk2a",
         "Fos", "Fosl2", "Npas4", "Arc")

vsdDG <- combinePCvsd("../data/03_DG_vsdtraining.csv", "DG", candidategenes)

## Joining, by = "ID"

##       ID subfield        treatment training        PC1         PC2    Prkcz
## 1 15143A       DG conflict.trained  trained -0.2275039  3.03543738 9.052238
## 2 15143B       DG   conflict.yoked    yoked -3.1436627 -0.48834291 8.922782
## 3 15143D       DG   standard.yoked    yoked -2.7532719 -0.07584809 9.060078
## 4 15144A       DG conflict.trained  trained  6.7041815 -0.07853719 9.004395
## 5 15144C       DG standard.trained  trained  7.0499369 -1.78499206 8.903003
## 6 15144D       DG   standard.yoked    yoked -3.3026284  1.17314374 9.094801
##      Prkci     Wwc1    Grin1    Gria1    Pick1      Nsf     Fmr1   Camk2a
## 1 7.719338 8.301099 10.68066 11.52905 7.530504 11.49573 8.027732 13.21496
## 2 7.548660 8.215442 10.80562 11.42916 7.327508 11.42863 8.205092 12.63324
## 3 8.006286 8.350489 10.83083 11.63562 7.717724 11.41348 8.200184 12.89590
## 4 7.852252 8.451785 10.69996 11.75007 7.474781 11.61321 8.401654 13.45862
## 5 7.888498 8.410608 10.73685 11.67325 7.272327 11.62802 8.219559 13.01407
## 6 7.736955 8.438542 10.69857 11.75503 7.611018 11.72119 8.012141 13.18540
##         Fos     Fosl2     Npas4       Arc
## 1 11.273104 10.047064 11.707078 11.248244
## 2 10.859684  9.277070  9.895664  9.360658
## 3 10.440561  9.137341 10.057463  9.740559
## 4 11.515130 11.374861 12.733872 11.465318
## 5 11.765583 11.054132 12.631030 12.124175
## 6  8.213153  8.489472  8.594899  8.468157

vsdCA1 <- combinePCvsd("../data/03_CA3_vsdtraining.csv", "CA1", candidategenes)

## Joining, by = "ID"

##       ID subfield        treatment training       PC1         PC2    Prkcz
## 1 15143B      CA1   conflict.yoked    yoked -3.143663 -0.48834291       NA
## 2 15143C      CA1 standard.trained  trained  5.886039  0.33235063       NA
## 3 15143D      CA1   standard.yoked    yoked -2.753272 -0.07584809       NA
## 4 15144A      CA1 conflict.trained  trained  6.704182 -0.07853719 8.480993
## 5 15144B      CA1   conflict.yoked    yoked -2.347673 -0.02836475 9.020478
## 6 15144C      CA1 standard.trained  trained  7.049937 -1.78499206 9.330134
##      Prkci     Wwc1    Grin1    Gria1    Pick1      Nsf     Fmr1   Camk2a
## 1       NA       NA       NA       NA       NA       NA       NA       NA
## 2       NA       NA       NA       NA       NA       NA       NA       NA
## 3       NA       NA       NA       NA       NA       NA       NA       NA
## 4 7.257581 7.604937 10.42402 10.87217 7.515542 11.42011 7.561053 12.29037
## 5 7.789876 8.568090 10.77667 11.30591 7.449523 11.67896 7.789876 12.94293
## 6 7.491501 7.761925 10.40600 11.09883 7.283242 11.49317 7.952061 12.78681
##         Fos    Fosl2    Npas4      Arc
## 1        NA       NA       NA       NA
## 2        NA       NA       NA       NA
## 3        NA       NA       NA       NA
## 4  9.855929 7.976064 7.515542 9.208575
## 5 10.205795 7.525519 9.020478 9.821415
## 6 10.384651 7.824398 7.264073 9.413381

vsdCA3 <- combinePCvsd("../data/03_DG_vsdtraining.csv", "CA3", candidategenes)

## Joining, by = "ID"

##       ID subfield        treatment training        PC1         PC2    Prkcz
## 1 15143A      CA3 conflict.trained  trained -0.2275039  3.03543738 9.052238
## 2 15144A      CA3 conflict.trained  trained  6.7041815 -0.07853719 9.004395
## 3 15144B      CA3   conflict.yoked    yoked -2.3476730 -0.02836475       NA
## 4 15144C      CA3 standard.trained  trained  7.0499369 -1.78499206 8.903003
## 5 15144D      CA3   standard.yoked    yoked -3.3026284  1.17314374 9.094801
## 6 15145A      CA3 conflict.trained  trained  6.9392690  0.25866181 8.988535
##      Prkci     Wwc1    Grin1    Gria1    Pick1      Nsf     Fmr1   Camk2a
## 1 7.719338 8.301099 10.68066 11.52905 7.530504 11.49573 8.027732 13.21496
## 2 7.852252 8.451785 10.69996 11.75007 7.474781 11.61321 8.401654 13.45862
## 3       NA       NA       NA       NA       NA       NA       NA       NA
## 4 7.888498 8.410608 10.73685 11.67325 7.272327 11.62802 8.219559 13.01407
## 5 7.736955 8.438542 10.69857 11.75503 7.611018 11.72119 8.012141 13.18540
## 6 7.965716 8.296659 10.85650 11.69118 7.260001 11.63924 8.056599 12.98003
##         Fos     Fosl2     Npas4       Arc
## 1 11.273104 10.047064 11.707078 11.248244
## 2 11.515130 11.374861 12.733872 11.465318
## 3        NA        NA        NA        NA
## 4 11.765583 11.054132 12.631030 12.124175
## 5  8.213153  8.489472  8.594899  8.468157
## 6 10.314514  9.471430 11.342022 10.184857

# save only genes in all
DGgenes <- names(vsdDG)
CA3genes <- names(vsdCA3)
CA1genes <- names(vsdCA1)
savecols <- intersect(DGgenes, CA3genes)
savecols <- intersect(savecols, CA1genes)


vsdDG <- vsdDG %>% dplyr::select(one_of(savecols)) 
vsdCA1 <- vsdCA1 %>% dplyr::select(one_of(savecols)) 
vsdCA3 <- vsdCA3 %>% dplyr::select(one_of(savecols)) 

allvsd <- rbind(vsdDG, vsdCA1)
allvsd <- rbind(allvsd, vsdCA3)
head(allvsd)

##       ID subfield        treatment training        PC1         PC2    Prkcz
## 1 15143A       DG conflict.trained  trained -0.2275039  3.03543738 9.052238
## 2 15143B       DG   conflict.yoked    yoked -3.1436627 -0.48834291 8.922782
## 3 15143D       DG   standard.yoked    yoked -2.7532719 -0.07584809 9.060078
## 4 15144A       DG conflict.trained  trained  6.7041815 -0.07853719 9.004395
## 5 15144C       DG standard.trained  trained  7.0499369 -1.78499206 8.903003
## 6 15144D       DG   standard.yoked    yoked -3.3026284  1.17314374 9.094801
##      Prkci     Wwc1    Grin1    Gria1    Pick1      Nsf     Fmr1   Camk2a
## 1 7.719338 8.301099 10.68066 11.52905 7.530504 11.49573 8.027732 13.21496
## 2 7.548660 8.215442 10.80562 11.42916 7.327508 11.42863 8.205092 12.63324
## 3 8.006286 8.350489 10.83083 11.63562 7.717724 11.41348 8.200184 12.89590
## 4 7.852252 8.451785 10.69996 11.75007 7.474781 11.61321 8.401654 13.45862
## 5 7.888498 8.410608 10.73685 11.67325 7.272327 11.62802 8.219559 13.01407
## 6 7.736955 8.438542 10.69857 11.75503 7.611018 11.72119 8.012141 13.18540
##         Fos     Fosl2     Npas4       Arc
## 1 11.273104 10.047064 11.707078 11.248244
## 2 10.859684  9.277070  9.895664  9.360658
## 3 10.440561  9.137341 10.057463  9.740559
## 4 11.515130 11.374861 12.733872 11.465318
## 5 11.765583 11.054132 12.631030 12.124175
## 6  8.213153  8.489472  8.594899  8.468157

summary(lm( PC1 ~ Arc, data = vsdDG))

## 
## Call:
## lm(formula = PC1 ~ Arc, data = vsdDG)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.7195 -1.8538  0.1447  1.4708  5.6309 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -29.184      6.810  -4.286 0.001059 ** 
## Arc            2.994      0.660   4.536 0.000682 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.748 on 12 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.6316, Adjusted R-squared:  0.6009 
## F-statistic: 20.58 on 1 and 12 DF,  p-value: 0.0006823

summary(lm( PC2 ~ Arc, data = vsdDG))

## 
## Call:
## lm(formula = PC2 ~ Arc, data = vsdDG)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.9226 -1.0067 -0.5923  0.5110  3.2901 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   9.8100     3.7627   2.607   0.0229 *
## Arc          -0.8948     0.3647  -2.454   0.0304 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.519 on 12 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.3341, Adjusted R-squared:  0.2786 
## F-statistic:  6.02 on 1 and 12 DF,  p-value: 0.03039

cor.test(vsdDG$PC1, vsdDG$Arc, method = c("pearson"))

## 
##  Pearson's product-moment correlation
## 
## data:  vsdDG$PC1 and vsdDG$Arc
## t = 4.5362, df = 12, p-value = 0.0006823
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4568067 0.9322321
## sample estimates:
##       cor 
## 0.7947587

cor.test(vsdDG$PC2, vsdDG$Arc, method = c("pearson"))

## 
##  Pearson's product-moment correlation
## 
## data:  vsdDG$PC2 and vsdDG$Arc
## t = -2.4536, df = 12, p-value = 0.03039
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.84839573 -0.06839018
## sample estimates:
##        cor 
## -0.5779963

iconDG <- png::readPNG("../figures/00_schematics/DG.png")
iconDG <-  grid::rasterGrob(iconDG, interpolate=TRUE)

vsdDG$training <- factor(vsdDG$training, levels = levelstraining)

a <- ggplot(vsdDG, aes(x = Arc, y = PC1)) +
   geom_point(aes( color = training)) + 
   geom_smooth(method = "lm", color = "grey") +
   scale_color_manual(values = allcolors) +
  theme_ms() +
   theme(legend.position = "bottom",
         axis.title.x = element_text(face = "italic"),
         legend.title = element_blank(), 
         legend.key.size = unit(0.25, "cm"))  +
  labs(subtitle = "r = 0.81, p = 0.0002") +
   annotation_custom(iconDG, ymin = 6, ymax = 11, xmin = 7.5, xmax = 9)
a

## `geom_smooth()` using formula 'y ~ x'

## Warning: Removed 2 rows containing non-finite values (stat_smooth).

## Warning: Removed 2 rows containing missing values (geom_point).