refactor(PAB): remove ambiguous edgeR's functions calls

hdescobarh · May 28, 2024 · 59e563b · 59e563b
1 parent f80a12b
commit 59e563b
Showing 1 changed file with 52 additions and 30 deletions.
diff --git a/PAB_Lab13_diff_expr/Code/do_DE_analysis.r b/PAB_Lab13_diff_expr/Code/do_DE_analysis.r
@@ -25,27 +25,6 @@ if (!dir.exists(output_directory_path)) {
   dir.create(output_directory_path)
 }
 
-# Libraries validation
-
-missing_packages <- character()
-for (package in c(
-  "edgeR"
-)) {
-  if (!require(package, quietly = TRUE, character.only = TRUE)) {
-    missing_packages <- append(missing_packages, package)
-  }
-}
-
-if (length(missing_packages) > 0) {
-  stop(
-    sprintf(
-      "Missing the following packages: %s",
-      paste(missing_packages, collapse = ", ")
-    ),
-    call. = FALSE
-  )
-}
-
 
 ########################## LOAD AND VALIDATE DATA ##########################
 
@@ -129,16 +108,19 @@ normalization_and_dispersion <- function(
     non_normalized_data, normalization_method, design) {
   # Calculate scaling factors for transforming
   #   library sizes to effective library sizes
-  data_normalized <- normLibSizes(
+  data_normalized <- edgeR::normLibSizes(
     non_normalized_data,
     method = normalization_method
   )
 
-  plotMDS(data_normalized, col = as.numeric(data_normalized$samples$treat))
+  edgeR::plotMDS(
+    data_normalized,
+    col = as.numeric(data_normalized$samples$treat)
+  )
   # Estimates Common, Trended and Tagwise NB dispersions
-  data_normalized <- estimateDisp(data_normalized, design)
-  plotBCV(data_normalized)
-  plotMeanVar(data_normalized, show.tagwise.vars = TRUE, NBline = TRUE)
+  data_normalized <- edgeR::estimateDisp(data_normalized, design)
+  edgeR::plotBCV(data_normalized)
+  edgeR::plotMeanVar(data_normalized, show.tagwise.vars = TRUE, NBline = TRUE)
   data_normalized
 }
 
@@ -151,18 +133,58 @@ model_fit <- function(data_normalized, contrasts) {
   # The QL F-test's statistic reflects the uncertainty in the dispersion
   #   estimation for each gene and gives more control over type I error rate.
 
-  fit <- glmQLFit(data_normalized)
-  plotQLDisp(fit)
+  fit <- edgeR::glmQLFit(data_normalized)
+  edgeR::plotQLDisp(fit)
 
   # Heat map
   # The documentation suggest to use "moderated log-counts-per-million"
   # for drawing a head map of individual RNA-seq samples
 
-
   # get cpm log2 values using the fitted coefficients
-  log_cpm <- cpm(fit, log = TRUE)
+  log_cpm <- edgeR::cpm(fit, log = TRUE)
   heatmap(log_cpm, scale = "row", col = heat.colors(256))
 
   fit
 }
 ############ 6. differential expression tests
+
+test_models <- function(fit, contrasts) {
+  for (current_contrast in colnames(my_contrasts)) {
+    # Performs genewise quasi F-tests for the given contrast
+    qlf_test <- edgeR::glmQLFTest(
+      fit,
+      contrast = my_contrasts[, current_contrast]
+    )
+
+    # Table rows =min(n, number of genes with
+    #   adjusted p-value <= cutoff p-value), the default p-value cutoff is 1!!!
+    top <- edgeR::topTags(qlf_test, n = Inf)
+    # Save top table
+    top_table_file_name <- sprintf(
+      "%s/top_DE_%s.tsv",
+      output_directory_path,
+      current_contrast
+    )
+    write.table(
+      top,
+      file = top_table_file_name, append = FALSE, quote = FALSE, sep = "\t"
+    )
+
+    # Make a mean-difference plot with smearing of points with very low counts
+    # Highlight top FDR < 0.05
+    fdr_threshold <- 0.05
+    top_selected_ids <- top$table[top$table$FDR < fdr_threshold, 1]
+
+    smear_file_name <- sprintf(
+      "%s/smear_MDplot_%s.pdf",
+      output_directory_path,
+      current_contrast
+    )
+    pdf(file = smear_file_name)
+    edgeR::plotSmear(qlf_test, de.tags = top_selected_ids)
+    abline(
+      h = c(-0.5, 0, 0.5), col = c("#0459ad", "red", "#0459ad"), lty = 2
+    )
+    dev.off()
+  }
+}