PSGE_functions.R

################################################################################################################
# filter SNPs [filter_SNPs]
## 1) Filter SNPs that overlap > 2 genes
## 2) Filter SNPs overlapping 2 genes on the same strand
## 3) Filter SNPs within miRNA/tRNA genes
filter_SNPs <- function(SNP_gene_bed,filterlist,cores){
  # SNP_gene_bed = bed file of SNPs intersecting genes of the format:
  ## Chr	Pos	Pos	snp_#:gene	.	strand
  # filterlist = list of gene IDs to filter
  # cores = # of threads for multithreaded search

  # Read in and wrangle data
  SNPs <- read.table(SNP_gene_bed,sep="\t",header=F)[,c(1,2,4,6)]
  names(SNPs) <- c("chr","pos","SNP_gene","strand")
  SNPs$SNP <- as.character(map(strsplit(SNPs$SNP_gene, split = ":"), 1))
  SNPs$geneID <- as.character(map(strsplit(SNPs$SNP_gene, split = ":"), 2))
  
  # Remove SNPs that overlap > 2 genes
  SNP.filter <- table(SNPs$SNP)
  SNPs <- SNPs[!SNPs$SNP%in%names(SNP.filter[SNP.filter>2]),]
  
  # Remove SNPs overlapping 2 genes on the same strand
  SNP.filter <- names(SNP.filter[SNP.filter==2])
  registerDoParallel(cores=cores)
  SNP.filter.list <- foreach(i=1:length(SNP.filter)) %dopar% {
    SNP.check <- table(SNPs[SNPs$SNP==SNP.filter[i],"strand"])
    if(any(SNP.check>1)){SNP.filter.list <- return(SNP.filter[i])}
  }
  SNPs <- SNPs[!SNPs$SNP%in%unlist(SNP.filter.list),]
  rm(SNP.filter.list,SNP.filter)
  
  # Filter SNPs in miRNA/tRNA genes
  SNPs <- SNPs[!SNPs$geneID%in%filterlist,]
  
  # Return
  return(SNPs)
}
################################################################################################################


################################################################################################################
# Combine coverage files to single matrix [make_ASE_counts_matrix]
make_ASE_counts_matrix <- function(DIR_COUNTS,metadata,cores){
  # DIR_COUNTS = name of subdirectory containing coverage files in the format:
  ## Chr	Pos	Pos	snp_#:gene .	strand	coverage
  # metadata = dataframe containing metadata information in the format:
  ## sample.id  parent  phenotype individual  lineage block
  # cores = # of threads for multithreaded search
  
  # Read in data and wrangle
  ## List files ending in .txt in DIR_COUNTS
  SNP_counts <- data.frame(matrix(ncol=0,nrow=length(SNPs$SNP_gene)))
  SNP_counts$SNP_gene <- SNPs$SNP_gene
  files.counts <- list.files(path=DIR_COUNTS, pattern="*.txt", 
                             full.names=TRUE, recursive=FALSE)
  ## Left join coverage files by SNP:gene ID
  for(i in 1:length(files.counts)){
    print(i)
    tmp.name <-  strsplit(strsplit(files.counts[[i]], 
                                   split = "/")[[1]][2],split = "[.]")[[1]][1]
    if(tmp.name%in%metadata$sample.id){
      tmp <- read.table(files.counts[[i]],header=F)[,c(4,7)]
      names(tmp) <- c("SNP_gene",tmp.name)
      SNP_counts <- SNP_counts %>% 
        left_join(tmp, by = c('SNP_gene' = 'SNP_gene')) 
    }
  }
  rm(tmp,files.counts)
  row.names(SNP_counts) <- SNP_counts$SNP_gene
  SNP_counts$SNP_gene <- NULL
  SNP_counts[is.na(SNP_counts)] <- 0
  
  # Remove genes with duplicate rows
  SNP_counts$gene <- as.character(map(strsplit(row.names(SNP_counts), split = ":"), 2))
  genelist <- unique(SNP_counts$gene)
  delete.rows <- list()
  registerDoParallel(cores=cores)
  delete.rows <- foreach(i=1:length(genelist)) %dopar% {
    tmp <- SNP_counts[SNP_counts$gene==genelist[i],]
    d <- row.names(tmp[duplicated(tmp),])
    delete.rows <- return(d)
  }
  SNP_counts <- SNP_counts[!row.names(SNP_counts)%in%unlist(delete.rows),]
  return(SNP_counts)
}
################################################################################################################


################################################################################################################
# Calculate library size factors [calcSizeFactors]
## 1) Merge allelic counts by library
## 2) Estimate library size factors using the median of ratios normalization method from DESeq2
calcSizeFactors <- function(counts,mdata){
  # counts = counts matrix generated by make_ASE_counts_matrix EXCEPT last column
  # mdata = dataframe containing metadata information in the format:
  # sample.id  parent  phenotype individual  lineage block
  
  # Merge allelic counts by library
  counts <- counts[,names(counts)%in%mdata$sample.id]
  counts_merged <- data.frame(matrix(ncol=0,nrow=length(row.names(counts))))
  samples <- unique(mdata$individual)
  for(i in 1:length(samples)){
    tmp <- counts[,names(counts)%in%mdata[mdata$individual==samples[i],"sample.id"]]
    tmp.merged <- rowSums(tmp)
    counts_merged <- cbind(counts_merged,tmp.merged)
  }
  names(counts_merged) <- samples
  mdata.merged <- mdata[,c(4,3,5)]
  mdata.merged <- mdata.merged[!duplicated(mdata.merged),]
  
  # Estimate library size factors using the median of ratios normalization method from DESeq2
  dds <- DESeqDataSetFromMatrix(countData = counts_merged, colData = mdata.merged, 
                                design = ~ lineage+phenotype)
  dds <- estimateSizeFactors(dds)
  sizeFactors <- sizeFactors(dds)
  sF2meta <- data.frame(individual=names(sizeFactors))
  sF2meta$sF <- as.numeric(sizeFactors)
  sF2meta <- left_join(sF2meta,mdata[,c(4,1)],multiple = "all")
  sF2meta <- sF2meta[,c(3,2)]
  sF2meta <- sF2meta[match(names(counts), sF2meta$sample.id),]
  sFs <- sF2meta$sF
  names(sFs) <- sF2meta$sample.id
  return(sFs)
}
################################################################################################################


################################################################################################################
# Normalize counts by library size [normalizeASReadCounts]
## 1) Merge allelic counts by library
## 2) Estimate library size factors using the median of ratios normalization method from DESeq2
## 3) Normalize counts for each library by allele
normalizeASReadCounts <- function(counts,mdata,size_factors){
  # counts = counts matrix generated by make_ASE_counts_matrix EXCEPT last column
  # mdata = dataframe containing metadata information in the format:
  ## sample.id  parent  phenotype individual  lineage block
  # size_factors = object from calcSizeFactors
  
  # Normalize counts for each library by allele
  counts <- counts[,names(counts)%in%mdata$sample.id]
  as.dds <- DESeqDataSetFromMatrix(countData = counts, colData = mdata, 
                                   design = ~ lineage+phenotype)
  sizeFactors(as.dds) = size_factors
  counts_normalized <- data.frame(counts(as.dds, normalized=TRUE))
  return(counts_normalized)
}
################################################################################################################


################################################################################################################
# Filter low-count SNPs from count matrix [filter_counts]
## 1) Remove rows with 0 counts by cross
## 2) Flag rows with > 10000 read counts (we run an optimized version of the 
### binomial exact test on these rows, as the binom.test function cannot handle counts > 10000)
## 3) Remove genes with < 2 SNPs after steps 1 and 2
filter_counts <- function(counts,metadata,lcf){
  # counts = phenotype-specific counts matrix
  # metadata = dataframe containing metadata information in the format:
  ## sample.id  parent  phenotype individual  lineage block
  # lcf = low count filter threshold (integer)
  
  # Remove rows with < lcf counts counts by cross
  LA <- metadata[metadata$lineage=="A","sample.id"]
  LB <- metadata[metadata$lineage=="B","sample.id"]
  counts <- counts[rowSums(counts[,names(counts)%in%LA])>lcf,]
  counts <- counts[rowSums(counts[,names(counts)%in%LB])>lcf,]
  
  # Flag rows with greater than 10000 counts
  counts$SUM <- rowSums(counts)
  counts$SKrow <- F
  counts[counts$SUM<10000,"SKrow"] <- T
  counts$SUM <- NULL
  
  # Remove rows with < 2 SNPs
  counts$gene <- as.character(map(strsplit(row.names(counts), split = ":"), 2))
  genelist <- unique(counts$gene)
  delete.rows <- list()
  for(i in 1:length(genelist)){
    tmp <- counts[counts$gene==genelist[i],]
    tmp <- tmp[!duplicated(tmp),]
    if(length(row.names(tmp))<2){
      delete.rows <- append(delete.rows,genelist[i])
    }
  }
  counts <- counts[!counts$gene%in%unlist(delete.rows),]
  counts$gene <- NULL
  
  # Return filtered counts
  return(counts)
}
################################################################################################################


################################################################################################################
# Storer-Kim test [twobinom]
## (Function from the WRS2 package). Test the hypothesis that two independent 
### binomials have equal probability of success using the Storer--Kim method.
## Modified for efficiency: changed outer() command to Rfast::Outer()
twobinom<-function(r1,n1,r2,n2,alpha=.05){
  # r1 = success in group 1
  # n1 = total in group 1
  # r2 = success in group 2
  # r2 = total in group 2
  n1p<-n1+1
  n2p<-n2+1
  n1m<-n1-1
  n2m<-n2-1
  q <- r1/n1
  p <- r2/n2
  if(is.na(q)){q <- 0}
  if(is.na(p)){p <- 0}
  chk<-abs(q-p)
  x<-c(0:n1)/n1
  y<-c(0:n2)/n2  
  phat<-(r1+r2)/(n1+n2)
  m1<-t(Outer(x,y,"-"))
  m2<-matrix(1,n1p,n2p)
  flag<-(abs(m1)>=chk)
  m3<-m2*flag
  rm(m1,m2,flag)
  xv<-c(1:n1)
  yv<-c(1:n2)
  xv1<-n1-xv+1
  yv1<-n2-yv+1
  dis1<-c(1,pbeta(phat,xv,xv1))
  dis2<-c(1,pbeta(phat,yv,yv1))
  pd1<-NA
  pd2<-NA
  for(i in 1:n1){pd1[i]<-dis1[i]-dis1[i+1]}
  for(i in 1:n2){pd2[i]<-dis2[i]-dis2[i+1]}
  pd1[n1p]<-phat^n1
  pd2[n2p]<-phat^n2
  m4<-t(Outer(pd1,pd2,"*"))
  test<-sum(m3*m4)
  rm(m3,m4)
  list(p.value=test,p1=q,p2=p,est.dif=q-p)
}
################################################################################################################


################################################################################################################
## Wrapper for Storer-Kim test [PSGE.SK]
## At each SNP, conduct a Storer-Kim binomial exact test of two proportions to 
### test whether there is a statistically significant difference in the proportion 
### of maternal vs paternal read counts
PSGE.SK <- function(counts,metadata,phenotype,cores){
  # counts = phenotype-specific counts matrix
  # metadata = dataframe containing metadata information in the format:
  ## sample.id  parent  phenotype individual  lineage block
  # phenotype = one of "unresponsive" or "responsive"
  # cores = # of threads for multithreaded search
  
  # Split data by pat and mat
  pat.exp <- counts[,metadata[metadata$parent%in%c("D")&
                                metadata$phenotype==phenotype,"sample.id"]]
  mat.exp <- counts[,metadata[metadata$parent%in%c("Q")&
                                metadata$phenotype==phenotype,"sample.id"]]
  # Set up for DoParallel
  SKrows=counts$SKrow
  registerDoParallel(cores=cores)
  i.len=length(row.names(pat.exp))
  
  # For each row, conduct an SK test and return the p-value
  return.df <- foreach(i=1:i.len, .combine=rbind,
                       .export=ls(globalenv()),.packages="Rfast") %dopar% {
                         SNP_gene=row.names(pat.exp[i,])
                         p1.s=sum(pat.exp[i,])
                         p2.s=sum(mat.exp[i,])
                         p.o=sum(p1.s,p2.s)
                         if(SKrows[i]==T){
                           test=twobinom(r1=p1.s,n1=p.o,r2=p2.s,n2=p.o)$p.value
                         }else{
                           test=fisher.test(matrix(c(p1.s,p2.s,p2.s,p1.s),ncol = 2))$p.value
                         }
                         return.append=data.frame(SNP_gene=SNP_gene,p=test)
                         return(return.append)
                       }
  return.df=return.df[match(row.names(pat.exp), return.df$SNP_gene),]
  
  # Return
  return(return.df)
}
################################################################################################################


################################################################################################################
# General linear mixed effects model with interactions [PSGE.GLIMMIX]
## For each gene, fit a model to determine whether there is 
### a statistically significant effect of parent, lineage, or their interaction 
### on read counts at all SNP positions across the transcript
PSGE.GLIMMIX <- function(counts,metadata,size_factors,cores){
  # counts = phenotype-specific counts matrix
  # metadata = dataframe containing metadata information in the format:
  ## sample.id  parent  phenotype individual  lineage block
  # phenotype = one of "unresponsive" or "responsive"
  # cores = # of threads for multithreaded search
  # size_factors = object from calcSizeFactors
  
  sizeFactors <- data.frame(t(size_factors[names(size_factors)%in%names(counts)]))
  sizeFactors <- gather(sizeFactors, sample.id, sizeFactor, 
                        names(sizeFactors),factor_key=FALSE)
  counts$SNP_gene <- row.names(counts)
  counts$geneID <- as.character(unlist(map(strsplit(counts$SNP_gene, 
                                                    split = ":"), 2)))
  genelist <- unique(counts$geneID)
  registerDoParallel(cores=cores)
  i.len <- length(genelist)
  
  df.out <- foreach(i=1:i.len,.combine=rbind) %dopar% {
    counts.sub <- counts[counts$geneID==genelist[i],]
    counts.sub$geneID <- NULL
    counts.sub <- gather(counts.sub, sample.id, count, 
                         names(counts.sub), -SNP_gene, factor_key=TRUE)
    counts.sub <- join(counts.sub, metadata, by = "sample.id")
    counts.sub <- join(counts.sub,sizeFactors,by="sample.id")
    counts.sub$parent <- as.factor(str_sub(counts.sub$parent,-1,-1))
    counts.sub$SNP_gene <- as.factor(counts.sub$SNP_gene)
    counts.sub$lineage <- as.factor(counts.sub$lineage)
    counts.sub$individual <- as.factor(counts.sub$individual)
    testfail <- F
    test <- "null"
    tryCatchLog(test <- lmer(count~parent+lineage+parent*lineage+(1|SNP_gene)+
                               (1|individual)+offset(log(sizeFactor)),data=counts.sub), 
                error = function(e) {testfail <- T})
    if(class(test)=="character"){testfail <- T}
    if(testfail==F){
      test <- summary(test)
      parent.p.list <- test[["coefficients"]][2,5]
      cross.p.list <- test[["coefficients"]][3,5]
      parent.cross.p.list <- test[["coefficients"]][4,5]
    }else{
      parent.p.list <- 1
      cross.p.list <- 1
      parent.cross.p.list <- 1
    }
    return(data.frame(ID=genelist[i],
                      parent.p=parent.p.list,
                      cross.p=cross.p.list,
                      parentXcross.p=parent.cross.p.list))
  }
  return(df.out)
}
################################################################################################################


################################################################################################################
# Assess test results for each gene [PSGE.analysis]
## 1) Split count matrices by cross and parent of origin for plotting
## 2) Set up a data.frame to plot %p1 and %p2 for each SNP
## 3) Join results of Storer-Kim tests
## 4) Join results of GLIMMIX models
## 5) Correct for multiple testing
## 6) For each gene, check whether all SNPs are biased in the same direction at established thresholds
## 7) Genes with parentXcross effects are flagged as unbiased
PSGE.analysis <- function(counts,phenotype,metadata,SK,GLIMMIX){
  # counts = phenotype-specific count matrix
  # phenotype = one of "unresponsive" or "responsive"
  # metadata = dataframe containing metadata information in the format:
  ## sample.id  parent  phenotype individual  lineage block
  # SK = object from PSGE.SK
  # GLIMMIX = object from PSGE.GLIMMIX
  
  # Split count matrices by cross and parent of origin for plotting
  counts <- counts[,names(counts)%in%metadata$sample.id]
  p1.pat <- counts[,metadata[metadata$parent%in%c("D")&metadata$lineage=="B"&metadata$phenotype==phenotype,"sample.id"]]
  p1.mat <- counts[,metadata[metadata$parent%in%c("Q")&metadata$lineage=="B"&metadata$phenotype==phenotype,"sample.id"]]
  p2.pat <- counts[,metadata[metadata$parent%in%c("D")&metadata$lineage=="A"&metadata$phenotype==phenotype,"sample.id"]]
  p2.mat <- counts[,metadata[metadata$parent%in%c("Q")&metadata$lineage=="A"&metadata$phenotype==phenotype,"sample.id"]]
  
  # Set up a data.frame to plot p1 and p2 for each SNP
  p1.plot <- data.frame(rowSums(p1.pat)/(rowSums(p1.mat)+rowSums(p1.pat)))
  names(p1.plot) <- c("p1")
  p1.plot[is.nan(p1.plot$p1),"p1"] <- 0
  p2.plot <- data.frame(rowSums(p2.mat)/(rowSums(p2.mat)+rowSums(p2.pat)))
  names(p2.plot) <- c("p2")
  p2.plot[is.nan(p2.plot$p2),"p2"] <- 0
  plot <- cbind(p1.plot,p2.plot)
  
  # Join results of Storer-Kim tests
  plot <- plot[row.names(plot)%in%SK$SNP_gene,]
  plot$SK.p <- SK$p
  plot$SNP_gene <- row.names(plot)
  plot$gene <- as.character(map(strsplit(plot$SNP_gene, split = ":"), 2))
  
  # Reformat output from GLIMMMIX models
  GLIMMIX.biased <- data.frame(gene=GLIMMIX$ID,
                               parent.p=GLIMMIX$parent.p,
                               cross.p=GLIMMIX$cross.p,
                               parentXcross.p=GLIMMIX$parentXcross.p)
  
  # Correct for multiple testing
  plot$SK.padj <- p.adjust(plot$SK.p,"BH")
  plot$bias <- "NA"
  GLIMMIX$parent.padj <- p.adjust(GLIMMIX$parent.p,"BH")
  GLIMMIX$cross.padj <- p.adjust(GLIMMIX$cross.p,"BH")
  GLIMMIX$parentXcross.padj <- p.adjust(GLIMMIX$parentXcross.p,"BH")
  GLIMMIX.biased <- GLIMMIX[GLIMMIX$parent.padj<0.05|GLIMMIX$cross.padj<0.05,1]
  GLIMMIX.biased <- setdiff(GLIMMIX.biased,GLIMMIX[GLIMMIX$parentXcross.padj<0.05,1])
  
  # For each gene, check whether all SNPs are biased in the same direction at established thresholds
  ## Genes with parentXcross effects are flagged as unbiased
  for(i in 1:length(row.names(plot))){
    p <- plot[i,"SK.padj"]
    p1 <- plot[i,"p1"]
    p2 <- plot[i,"p2"]
    if(p<0.05&p1>0.6&p2<0.4){plot[i,"bias"] <- "pat"}
    if(p<0.05&p1<0.4&p2>0.6){plot[i,"bias"] <- "mat"}
    if(p<0.05&p1<0.4&p2<0.4){plot[i,"bias"] <- "Cross B"}
    if(p<0.05&p1>0.6&p2>0.6){plot[i,"bias"] <- "Cross A"}
  }
  biaslist <- data.frame(matrix(ncol=2,nrow=0))
  names(biaslist) <- c("gene","bias")
  genelist <- unique(plot$gene)
  for(i in 1:length(genelist)){
    tmp <- unique(plot[plot$gene==genelist[i],"bias"])
    if(length(tmp)>1){
      if(length(tmp)==2){
        if(any(tmp%in%"NA")){
          bias <- tmp[!tmp%in%"NA"]
        }else{bias <- "NA"}
      }else{
        bias <- "NA"
      }
    }else{bias <- tmp}
    biaslist <- rbind(biaslist,data.frame(gene=genelist[[i]], bias=bias))
  }
  plot <- plot %>% left_join(biaslist, by = c('gene' = 'gene')) 
  names(plot)[c(7:8)] <- c("xbias","bias")
  plot$bias.plot <- "NA"
  for(i in 1:length(row.names(plot))){
    p1 <- plot$p1[i]
    p2 <- plot$p2[i]
    bias <- plot$bias[i]
    if(!bias=="NA"){
      if(bias=="pat"){if(p1>0.6&p2<0.4){plot[i,"bias.plot"]<- "pat"}}
      if(bias=="mat"){if(p1<0.4&p2>0.6){plot[i,"bias.plot"] <- "mat"}}
      if(bias=="Cross B"){if(p1<0.4&p2<0.4){plot[i,"bias.plot"] <- "Cross B"}}
      if(bias=="Cross A"){if(p1>0.6&p2>0.6){plot[i,"bias.plot"] <- "Cross A"}}
    }
  }
  plot[!plot$gene%in%GLIMMIX.biased,"bias.plot"] <- "NA" 
  plot <- rbind(plot[plot$bias.plot%in%c("NA"),],
                plot[plot$bias.plot%in%c("mat", "Cross A", "Cross B", "pat"),])
  plot$bias.plot <- factor(plot$bias.plot,
                           levels = c("NA","mat", "Cross A", "Cross B", "pat"))
  
  # Return
  return(plot)
}
################################################################################################################


################################################################################################################
# Collapse SNPs to calculate p1 & p2 by transcript [PSGE.collapse.avgExp]
PSGE.collapse.avgExp <- function(data.plot,data.counts,metadata,phenotype){
  # data.plot = object from PSGE.analysis
  # data.counts = phenotype-specific counts matrix
  # metadata = dataframe containing metadata information in the format:
  ## sample.id  parent  phenotype individual  lineage block
  # phenotype = one of "unresponsive" or "responsive"
  
  # Wrangle data
  data.counts <- data.counts[,names(data.counts)%in%metadata$sample.id]
  data.counts$SNP_gene <- row.names(data.counts)
  data <- data.plot %>% 
    left_join(data.counts, by = c('SNP_gene' = 'SNP_gene')) 
  genelist <- unique(data$gene)
  p1.mean <- list()
  p2.mean <- list()
  biaslist <- list()
  
  # Collapse SNPs by gene
  for(i in 1:length(genelist)){
    tmp <- data[data$gene==genelist[i],]
    if(!any(tmp$bias=="NA") & length(tmp[!tmp$bias.plot=="NA","p1"])>0){
      tmp.sub <- tmp[!tmp$bias.plot=="NA",]
      # Split count matrices by cross and parent of origin for plotting
      p1.pat <- tmp.sub[,metadata[metadata$parent%in%c("D")&metadata$lineage=="B"&metadata$phenotype==phenotype,"sample.id"]]
      p1.mat <- tmp.sub[,metadata[metadata$parent%in%c("Q")&metadata$lineage=="B"&metadata$phenotype==phenotype,"sample.id"]]
      p2.pat <- tmp.sub[,metadata[metadata$parent%in%c("D")&metadata$lineage=="A"&metadata$phenotype==phenotype,"sample.id"]]
      p2.mat <- tmp.sub[,metadata[metadata$parent%in%c("Q")&metadata$lineage=="A"&metadata$phenotype==phenotype,"sample.id"]]
      p1.mean.x <- mean(sum(p1.pat)/(sum(p1.mat)+sum(p1.pat)))
      if(is.nan(p1.mean.x)){p1.mean.x <- 0}
      if(is.infinite(p1.mean.x)){p1.mean.x <- 1}
      p1.mean[i] <- p1.mean.x
      p2.mean.x <- mean(sum(p2.mat)/(sum(p2.mat)+sum(p2.pat)))
      if(is.nan(p2.mean.x)){p2.mean.x <- 0}
      if(is.infinite(p2.mean.x)){p2.mean.x <- 1}
      p2.mean[i] <- p2.mean.x
      biaslist[i] <- as.character(tmp.sub$bias.plot[1])
    }else{
      p1.mean[i] <- mean(tmp$p1)
      p2.mean[i] <- mean(tmp$p2)
      biaslist[i] <- "NA"}}
  return.data <- data.frame(gene=unlist(genelist),
                            bias.plot=unlist(biaslist),
                            p1=unlist(p1.mean),
                            p2=unlist(p2.mean))
  return.data$bias.plot <- factor(return.data$bias.plot,
                                  levels = c("NA","mat", "Cross A", "Cross B", "pat"))
  return.data <- return.data[order(return.data$bias.plot),]
  
  # Return
  return(return.data)
}
################################################################################################################


################################################################################################################
# Generate PSGE plot, averaging p1 & p2 by transcript [`PSGE.plot.tx`]
PSGE.plot.tx <- function(data,title){
  # data = object from PSGE.collapse.avgExp
  # title = string, title of plot
 
  get_density <- function(x, y, ...){
  dens <- MASS::kde2d(x, y, ...)
  ix <- findInterval(x, dens$x)
  iy <- findInterval(y, dens$y)
  ii <- cbind(ix, iy)
  return(dens$z[ii])}
  
  biases <- levels(data$bias.plot)
  data$color <- NA
  for(i in 1:length(biases)){
    data.sub <- data[data$bias.plot==biases[i],]
    density <- get_density(data.sub$p1, data.sub$p2, n = 100)
    if(biases[i]=="NA"){pal <- colorRampPalette(colors = c("grey90", "grey70"))(60)}
    if(biases[i]=="pat"){pal <- colorRampPalette(colors = c("#a1d2ed", "#02a0f5"))(60)}
    if(biases[i]=="mat"){pal <- colorRampPalette(colors = c("grey50", "black"))(60)}
    if(biases[i]=="Cross A"){pal <- colorRampPalette(colors = c("#4dc4a2", "#058762"))(60)}
    if(biases[i]=="Cross B"){pal <- colorRampPalette(colors = c("#f7ca60", "#f7af05"))(60)}
    data[data$bias.plot==biases[i],"color"] <- smoothPalette(density,pal=pal)
  }
  data <- data[,3:5]
  data <- data[!duplicated(data),]
  
  # Generate plot
  g <- ggplot(data, aes(x=p1, y=p2,
                        color=color)) + 
    geom_point(size=3) + theme_classic() +
    xlab(expression(paste("% A allele in ",B[mother],
                          " x ",A[father],sep=""))) +
    ylab(expression(paste("% A allele in ",A[mother],
                          " x ",B[father],sep=""))) +
    ggtitle(title) +
    theme(text = element_text(size=18),
          plot.title = element_text(hjust = 0.5)) +
    guides(alpha=F, color=F) +
    scale_x_continuous(limits = c(0, 1), breaks = seq(0, 1, .2)) +
    scale_y_continuous(limits = c(0, 1), breaks = seq(0, 1, .2)) +
    scale_colour_identity()
  
  # Return
  return(g)
}
################################################################################################################


################################################################################################################
# Make center tri-plot [triplot.plot]
## 1) Conduct chi-squared tests for each category of bias, comparing unresponsive to responsive
## 2) Create table plot
triplot.plot <- function(unresponsive.plot,responsive.plot,label.A,label.B,allgenes){
  # unresponsive.plot = object from PSGE.collapse.avgExp
  # responsive.plot = object from PSGE.collapse.avgExp
  # label.A = string, label for column 1, row 1
  # label.B = string, label for column 3, row 1
  # allgenes = list containing background set of gene names for chi-squared tests
  
  # Combine number of genes in each category of bias into table
  gmid.df <- data.frame(
    Unresponsive=c(length(unique(unresponsive.plot[unresponsive.plot$bias.plot=="mat","gene"])),
      length(unique(unresponsive.plot[unresponsive.plot$bias.plot=="Cross A","gene"])),
      length(unique(unresponsive.plot[unresponsive.plot$bias.plot=="Cross B","gene"])),
      length(unique(unresponsive.plot[unresponsive.plot$bias.plot=="pat","gene"]))),
    Bias=c("mat","Cross A","Cross B","pat"),
    Responsive=c(length(unique(responsive.plot[responsive.plot$bias.plot=="mat","gene"])),
      length(unique(responsive.plot[responsive.plot$bias.plot=="Cross A","gene"])),
      length(unique(responsive.plot[responsive.plot$bias.plot=="Cross B","gene"])),
      length(unique(responsive.plot[responsive.plot$bias.plot=="pat","gene"])))
  )
  
  # Test if number of unresponsive biased genes is different from responsive biased genes
  mat.test <- chisq.test(data.frame(Success=c(gmid.df[1,1],gmid.df[1,3]),
                                    Failure=c(length(allgenes)-gmid.df[1,1],
                                              length(allgenes)-gmid.df[1,3]),
                                    row.names=c("unresponsive","responsive")))$p.value
  CrossA.test <- chisq.test(data.frame(Success=c(gmid.df[2,1],gmid.df[2,3]),
                                    Failure=c(length(allgenes)-gmid.df[2,1],
                                              length(allgenes)-gmid.df[2,3]),
                                    row.names=c("unresponsive", "responsive")))$p.value
  CrossB.test <- chisq.test(data.frame(Success=c(gmid.df[3,1],gmid.df[3,3]),
                                    Failure=c(length(allgenes)-gmid.df[3,1],
                                              length(allgenes)-gmid.df[3,3]),
                                    row.names=c("unresponsive","responsive")))$p.value
  pat.test <- chisq.test(data.frame(Success=c(gmid.df[4,1],gmid.df[4,3]),
                                    Failure=c(length(allgenes)-gmid.df[4,1],
                                              length(allgenes)-gmid.df[4,3]),
                                    row.names=c("unresponsive","responsive")))$p.value
  
  # Build table plot
  gmid.df$`.` <- c(mat.test,CrossA.test,CrossB.test,pat.test)
  gmid.df <- gmid.df[,c(4,1,2,3)]
  nsrows <- row.names(gmid.df[gmid.df$`.`>0.05,])
  gmid.df$`.` <- formatC(gmid.df$`.`, format = "e", digits = 2)
  gmid.df[nsrows,"."] <- "(ns)"
  gmid.df <- gmid.df[,c(2,3,4,1)]
  
  cols <- matrix("black", nrow(gmid.df), ncol(gmid.df))
  cols[1,2] <- "#000000"
  cols[2,2] <- "#009e73"
  cols[3,2] <- "#e69f00"
  cols[4,2] <- "#56b4e9"
    
  ccols <- matrix("white", nrow(gmid.df), ncol(gmid.df))
  ccols[1,3] <- "#f4efea"
  ccols[2,3] <- "#f4efea"
  ccols[3,3] <- "#f4efea"
  ccols[4,3] <- "#f4efea"
  ccols[1,1] <- "#f4efea"
  ccols[2,1] <- "#f4efea"
  ccols[3,1] <- "#f4efea"
  ccols[4,1] <- "#f4efea"
  ccols[1,2] <- "#e4d8d1"
  ccols[2,2] <- "#e4d8d1"
  ccols[3,2] <- "#e4d8d1"
  ccols[4,2] <- "#e4d8d1"
    
  cfonts <- matrix("plain", nrow(gmid.df), ncol(gmid.df))
  cfonts[1,2] <- "bold"
  cfonts[2,2] <- "bold"
  cfonts[3,2] <- "bold"
  cfonts[4,2] <- "bold"
  
  names(gmid.df) <- c(label.A,"Bias",label.B,".")
  gmid.df[2,2] <- "cross A"
  gmid.df[3,2] <- "cross B"
  
  tt <- ttheme_default(core=list(fg_params = list(col = cols, 
  cex = 1,fontface = cfonts),bg_params = list(col=NA,fill = ccols),
  padding.h=unit(2, "mm")),rowhead=list(bg_params = list(col=NA)),
  colhead=list(bg_params = list(fill = c("#f4efea","#e4d8d1","#f4efea","white")),
  fg_params = list(rot=90,cex = 1,col = c("black","black","black","white"))))
  
  # Generate plot
  gmid <- tableGrob(gmid.df, rows = NULL, theme=tt)
  
  # Return
  return(gmid)
}
################################################################################################################


################################################################################################################
# Export PSGE results [export_PSGE_results]
export_PSGE_results <- function(unresponsive.plot,responsive.plot,filename){
  # unresponsive.plot = object from PSGE.collapse.avgExp
  # responsive.plot = object from PSGE.collapse.avgExp
  # filename = string, file name to save gene lists to
  un.matBias <- unique(unresponsive.plot[unresponsive.plot$
                                           bias.plot=="mat","gene"])
  res.matBias <- unique(responsive.plot[responsive.plot$
                                          bias.plot=="mat","gene"])
  
  un.patBias <- unique(unresponsive.plot[unresponsive.plot$
                                           bias.plot=="pat","gene"])
  res.patBias <- unique(responsive.plot[responsive.plot$
                                          bias.plot=="pat","gene"])
  
  write.csv(data.frame(gene=c(un.matBias,res.matBias,
                              un.patBias,res.patBias),
                       bias=c(rep.int("mat",length(un.matBias)),
                              rep.int("mat",length(res.matBias)),
                              rep.int("pat",length(un.patBias)),
                              rep.int("pat",length(res.patBias))),
                       phenotype=c(rep.int("unresponsive",length(un.matBias)),
                                   rep.int("responsive",length(res.matBias)),
                                   rep.int("unresponsive",length(un.patBias)),
                                   rep.int("responsive",length(res.patBias)))),
            filename, row.names=F)
}
################################################################################################################