-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy path5-de.R
568 lines (514 loc) · 22.1 KB
/
5-de.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
# Mo Huang, mohuang@wharton.upenn.edu
# DE analysis
X <- readRDS("SAVER-data/zeisel_ref.rds")
Y <- readRDS("SAVER-data/zeisel_samp.rds")
n.cells <- ncol(X)
n.genes <- nrow(X)
cell.names <- colnames(X)
gene.names <- rownames(X)
normalizeData <- function(x, y = x) {
sf <- colSums(y)/mean(colSums(y))
return(sweep(x, 2, sf, "/"))
}
X.norm <- normalizeData(X)
Y.norm <- normalizeData(Y)
header <- read.table("SAVER-data/expression_mRNA_17-Aug-2014.txt", sep="\t",
nrows=10, header=FALSE, fill = TRUE, comment.char = "",
stringsAsFactors = FALSE)
header.df <- data.frame(t(header[9:10, -c(1, 2)]), stringsAsFactors = FALSE)
colnames(header.df) <- c("class1", "class2")
rownames(header.df) <- header[8, -c(1, 2)]
class1.cells <- header.df[colnames(X), 1]
class2.cells <- header.df[colnames(X), 2]
cell.subtype <- header.df[colnames(X), 2]
ca1pyr1 <- which(cell.subtype == "CA1Pyr1")
ca1pyr2 <- which(cell.subtype == "CA1Pyr2")
###############################################################################
### SAVER DE (takes a while to run)
# X.saver <- readRDS("SAVER-data/zeisel_ref_saver.rds")
#
# Y.saver <- readRDS("SAVER-data/zeisel_samp_saver.rds")
#
# get.sig.genes <- function(x, alpha, beta) {
# cell.subtype <- header.df[colnames(x), 2]
# ind <- which(cell.subtype %in% c("CA1Pyr1", "CA1Pyr2"))
# group1 <- x[, which(cell.subtype == "CA1Pyr1")]
# group2 <- x[, which(cell.subtype == "CA1Pyr2")]
# w <- vector("list", 10)
# for (i in 1:10) {
# x.samp <- t(sapply(1:n.genes, function(i) rgamma(length(ind), alpha[i, ind],
# beta[i, ind])))
# w[[i]] <- apply(x.samp, 1,
# FUN = function(z)
# unlist(
# wilcox.test(z ~ cell.subtype[
# ind])[1]))
# print(i)
# }
# n.x <- ncol(group1)
# n.y <- ncol(group2)
# mean.w <- Reduce("+", w)/length(w) - n.x*n.y/2
# var.w <- n.x*n.y/12*(n.x+n.y+1) + (1+1/10)/9*apply(simplify2array(w), 1, var)
# mean.z <- sapply(1:n.genes, function(i) (mean.w[i]-sign(mean.w[i])*0.5)/sqrt(var.w[i]))
# mean.p <- sapply(mean.z, function(x) 2*min(pnorm(x), pnorm(x, lower.tail = FALSE)))
# pvaluesrk2 <- cbind(mean.w, mean.p)
# pvaluesrk.adj <- p.adjust(pvaluesrk2[, 2], method = "BH")
# group1.mean <- rowMeans(group1)
# group2.mean <- rowMeans(group2)
# fold.change <- log2(group1.mean/group2.mean)
# df <- data.frame(genes = rownames(x),
# statistic = pvaluesrk2[, 1],
# variance = var.w,
# pvalue.rk = pvaluesrk2[, 2],
# pvalue.adj = pvaluesrk.adj,
# log2.fold.chg = fold.change, stringsAsFactors = FALSE)
# return(df)
# }
#
# X.saver.de <- get.sig.genes(X.saver$estimate, X.saver$alpha, X.saver$beta)
# Y.saver.de <- get.sig.genes(Y.saver$estimate, Y.saver$alpha, Y.saver$beta)
#
# saveRDS(list(X.saver.de, Y.saver.de), "SAVER-data/de_saver.rds")
###############################################################################
### MAST
# de.cells <- which(class2.cells %in% c("CA1Pyr1", "CA1Pyr2"))
#
# X.sub <- X.norm[, de.cells]
# Y.sub <- Y.norm[, de.cells]
#
# cdat <- data.frame(class2 = class2.cells[de.cells])
#
# library(MAST)
# library(data.table)
#
# X.sca <- FromMatrix(log2(X.sub+1), cdat)
# cdr <- colSums(assay(X.sca) > 0)
# colData(X.sca)$cngeneson <- scale(cdr)
# zlm.X <- zlm.SingleCellAssay(~ class2 + cngeneson, X.sca)
# summary.X <- summary(zlm.X, doLRT='class2CA1Pyr2')
# summary.XDt <- summary.X$datatable
# fcHurdle.X <- merge(summary.XDt[contrast=='class2CA1Pyr2' & component=='H',
# .(primerid, `Pr(>Chisq)`)], #hurdle P values
# summary.XDt[contrast=='class2CA1Pyr2' & component=='logFC',
# .(primerid, coef, ci.hi, ci.lo)], by='primerid')
# fcHurdle.X[,fdr:=p.adjust(`Pr(>Chisq)`, 'fdr')]
# X.sig <- data.frame(fcHurdle.X)
#
# Y.sca <- FromMatrix(log2(Y.sub+1), cdat)
# cdr <- colSums(assay(Y.sca) > 0)
# colData(Y.sca)$cngeneson <- scale(cdr)
# zlm.Y <- zlm.SingleCellAssay(~ class2 + cngeneson, Y.sca)
# summary.Y <- summary(zlm.Y, doLRT='class2CA1Pyr2')
# summary.YDt <- summary.Y$datatable
# fcHurdle.Y <- merge(summary.YDt[contrast=='class2CA1Pyr2' & component=='H',
# .(primerid, `Pr(>Chisq)`)], #hurdle P values
# summary.YDt[contrast=='class2CA1Pyr2' & component=='logFC',
# .(primerid, coef, ci.hi, ci.lo)], by='primerid')
# fcHurdle.Y[,fdr:=p.adjust(`Pr(>Chisq)`, 'fdr')]
# Y.sig <- data.frame(fcHurdle.Y)
#
# saveRDS(list(X.sig, Y.sig), "SAVER-data/de_mast.rds")
###############################################################################
### scDD
# library(scDD)
# library(DESeq2)
# library(BiocParallel)
# library(Biobase)
#
# register(MulticoreParam(16))
#
# X <- readRDS("SAVER-data/zeisel_ref.rds")
#
# Y <- readRDS("SAVER-data/zeisel_samp.rds")
#
# header <- read.table("data/expression_mRNA_17-Aug-2014.txt", sep="\t",
# nrows=10, header=FALSE, fill = TRUE, comment.char = "",
# stringsAsFactors = FALSE)
#
# header.names <- header[, 2]
#
# header <- header[, -c(1,2)]
#
# cell.subtype <- as.character(header[10, which(header[8, ] %in% colnames(X))])
#
# # only selecting CA1Pyr1 and CA1Pyr2
# ind <- which(cell.subtype %in% c("CA1Pyr1", "CA1Pyr2"))
#
# normalizeData <- function(x, y = x) {
# sf <- colSums(y)/mean(colSums(y))
# return(sweep(x, 2, sf, "/"))
# }
#
# X.norm <- normalizeData(X)
# Y.norm <- normalizeData(Y)
#
# X1 <- X.norm[, ind]
# Y1 <- Y.norm[, ind]
#
# cell.subtype1 <- as.character(header[10, which(header[8, ] %in% colnames(X1))])
#
# columnData <- data.frame(group = cell.subtype1, row.names = colnames(X1))
#
# phenoData <- data.frame(condition = ifelse(columnData$group == "CA1Pyr1", 1, 2),
# row.names = colnames(X1))
# phenoData <- as(phenoData, "AnnotatedDataFrame")
#
# X.se <- ExpressionSet(assayData = as.matrix(X1),
# phenoData = phenoData)
#
# Y.se <- ExpressionSet(assayData = as.matrix(Y1),
# phenoData = phenoData)
#
# prior_param <- list(alpha=0.01, mu0=0, s0=0.01, a0=0.01, b0=0.01)
#
# X.sig <- scDD(X.se, prior_param=prior_param, n.cores = cores)
# Y.sig <- scDD(Y.se, prior_param=prior_param, n.cores = cores)
#
# saveRDS(list(X.sig, Y.sig), "SAVER-data/de_scdd.rds")
###############################################################################
### SCDE
# library(scde)
# library(methods)
#
# X.sub <- X[, c(ca1pyr1, ca1pyr2)]
# Y.sub <- Y[, c(ca1pyr1, ca1pyr2)]
#
# sg <- factor(header.df[colnames(X.sub), "class2"],
# levels = c("CA1Pyr1", "CA1Pyr2"))
# names(sg) <- colnames(X.sub)
#
# X.err <- scde.error.models(counts = X.sub, groups = sg, n.cores = 1,
# threshold.segmentation = TRUE,
# save.crossfit.plots = FALSE,
# save.model.plots = FALSE,
# verbose = 1)
#
# Y.err <- scde.error.models(counts = Y.sub, groups = sg, n.cores = 1,
# threshold.segmentation = TRUE,
# save.crossfit.plots = FALSE,
# save.model.plots = FALSE,
# verbose = 1)
#
# valid.cells.X <- X.err$corr.a > 0
# valid.cells.Y <- Y.err$corr.a > 0
#
# X.err <- X.err[valid.cells.X, ]
# Y.err <- Y.err[valid.cells.Y, ]
#
# X.prior <- scde.expression.prior(models = X.err, counts = X.sub,
# length.out = 400, show.plot = FALSE)
#
# Y.prior <- scde.expression.prior(models = Y.err, counts = Y.sub,
# length.out = 400, show.plot = FALSE)
#
#
# X.diff <- scde.expression.difference(X.err, X.sub, X.prior, groups = sg,
# n.randomizations = 100, n.cores = 1,
# verbose = 1)
#
# Y.diff <- scde.expression.difference(Y.err, Y.sub, Y.prior, groups = sg,
# n.randomizations = 100, n.cores = 1,
# verbose = 1)
#
#
# X.diff$p.value <- 2*pnorm(abs(X.diff$cZ), lower.tail = FALSE)
# Y.diff$p.value <- 2*pnorm(abs(Y.diff$cZ), lower.tail = FALSE)
#
# saveRDS(list(X.diff, Y.diff), "SAVER-data/de_scde.rds")
###############################################################################
### FDR estimation
# library(doParallel)
#
# cl <- makeCluster(20)
# registerDoParallel(cl)
#
# X <- readRDS("SAVER-data/zeisel_ref.rds")
#
# Y <- readRDS("SAVER-data/zeisel_samp.rds")
#
# n.cells <- ncol(X)
# n.genes <- nrow(X)
#
# cell.names <- colnames(X)
# gene.names <- rownames(X)
#
# normalizeData <- function(x, y = x) {
# sf <- colSums(y)/mean(colSums(y))
# return(sweep(x, 2, sf, "/"))
# }
#
# X.norm <- normalizeData(X)
# Y.norm <- normalizeData(Y)
#
#
# header <- read.table("data/expression_mRNA_17-Aug-2014.txt", sep="\t",
# nrows=10, header=FALSE, fill = TRUE, comment.char = "",
# stringsAsFactors = FALSE)
#
# header.df <- data.frame(t(header[9:10, -c(1, 2)]), stringsAsFactors = FALSE)
# colnames(header.df) <- c("class1", "class2")
# rownames(header.df) <- header[8, -c(1, 2)]
#
# class1.cells <- header.df[colnames(X), 1]
# class2.cells <- header.df[colnames(X), 2]
#
# cell.subtype <- header.df[colnames(X), 2]
# ca1pyr1 <- which(cell.subtype == "CA1Pyr1")
# ca1pyr2 <- which(cell.subtype == "CA1Pyr2")
#
# de.cells <- which(class2.cells %in% c("CA1Pyr1", "CA1Pyr2"))
#
# X.sub <- X[, de.cells]
# X.sub.norm <- X.norm[, de.cells]
#
# Y.sub <- Y[, de.cells]
# Y.sub.norm <- Y.norm[, de.cells]
#
# package.list <- c("MAST", "data.table", "scde", "methods", "scDD", "DESeq2",
# "Biobase")
# X.fp <- foreach(i = 1:20, .packages = package.list,
# .errorhandling = 'pass') %dopar% {
# set.seed(i)
# de.cells.perm <- sample(de.cells, length(de.cells))
# x.samp <- t(sapply(1:n.genes, function(k) rgamma(length(de.cells),
# X.saver$alpha[k, de.cells],
# X.saver$beta[k, de.cells])))
# w <- apply(x.samp, 1,
# FUN = function(z)
# unlist(
# wilcox.test(z ~ cell.subtype[
# de.cells.perm])[1]))
# n.x <- ncol(group1)
# n.y <- ncol(group2)
# mean.w <- w-n.x*n.y/2
# var.w <- rep(n.x*n.y/12*(n.x+n.y+1), n.genes)
# mean.z <- sapply(1:n.genes, function(i) (mean.w[i]-sign(mean.w[i])*0.5)/sqrt(var.w[i]))
# mean.p <- sapply(mean.z, function(x) 2*min(pnorm(x), pnorm(x, lower.tail = FALSE)))
# pvaluesrk2 <- cbind(mean.w, mean.p)
# pvaluesrk.adj <- p.adjust(pvaluesrk2[, 2], method = "BH")
# group1.mean <- rowMeans(group1)
# group2.mean <- rowMeans(group2)
# fold.change <- log2(group1.mean/group2.mean)
# X.sig.saver <- data.frame(genes = rownames(x),
# statistic = pvaluesrk2[, 1],
# variance = var.w,
# pvalue.rk = pvaluesrk2[, 2],
# pvalue.adj = pvaluesrk.adj,
# log2.fold.chg = fold.change, stringsAsFactors = FALSE)
#
# cdat <- data.frame(class2 = class2.cells[de.cells.perm])
# Y.sca <- FromMatrix(log2(X.sub.norm+1), cdat)
# cdr <- colSums(assay(Y.sca) > 0)
# colData(Y.sca)$cngeneson <- scale(cdr)
# zlm.Y <- zlm.SingleCellAssay(~ class2 + cngeneson, Y.sca)
# summary.Y <- summary(zlm.Y, doLRT='class2CA1Pyr2')
# summary.YDt <- summary.Y$datatable
# fcHurdle.Y <- merge(summary.YDt[contrast=='class2CA1Pyr2' & component=='H',
# .(primerid, `Pr(>Chisq)`)], #hurdle P values
# summary.YDt[contrast=='class2CA1Pyr2' & component=='logFC',
# .(primerid, coef, ci.hi, ci.lo)], by='primerid')
# fcHurdle.Y[,fdr:=p.adjust(`Pr(>Chisq)`, 'fdr')]
# X.sig.mast <- data.frame(fcHurdle.Y)
#
# cell.subtype1 <- class2.cells[de.cells.perm]
# columnData <- data.frame(group = cell.subtype1,
# row.names = colnames(X.sub.norm))
# phenoData <- data.frame(condition = ifelse(columnData$group == "CA1Pyr1", 1, 2),
# row.names = colnames(X.sub.norm))
# phenoData <- as(phenoData, "AnnotatedDataFrame")
# X.se <- ExpressionSet(assayData = as.matrix(X.sub.norm),
# phenoData = phenoData)
# prior_param <- list(alpha=0.01, mu0=0, s0=0.01, a0=0.01, b0=0.01)
# X.sig.scdd <- scDD(X.se, prior_param=prior_param, n.cores = 1)
#
# sg <- factor(class2.cells[de.cells.perm],
# levels = c("CA1Pyr1", "CA1Pyr2"))
# names(sg) <- colnames(X.sub)
# X.err <- scde.error.models(counts = X.sub, groups = sg, n.cores = 1,
# threshold.segmentation = TRUE,
# save.crossfit.plots = FALSE,
# save.model.plots = FALSE)
# valid.cells.X <- X.err$corr.a > 0
# X.err <- X.err[valid.cells.X, ]
# X.prior <- scde.expression.prior(models = X.err, counts = X.sub,
# length.out = 400, show.plot = FALSE)
# X.sig.scde <- scde.expression.difference(X.err, X.sub, X.prior, groups = sg,
# n.randomizations = 100, n.cores = 1,
# verbose = 1)
# X.sig.scde$p.value <- 2*pnorm(abs(X.sig.scde$cZ), lower.tail = FALSE)
# return(list(saver = X.sig.saver, mast = X.sig.mast,
# scdd = X.sig.scdd,
# scde = X.sig.scde))
# }
#
# Y.fp <- foreach(i = 1:20, .packages = package.list,
# .errorhandling = 'pass') %dopar% {
# set.seed(i)
# de.cells.perm <- sample(de.cells, length(de.cells))
# x.samp <- t(sapply(1:n.genes, function(k) rgamma(length(de.cells),
# Y.saver$alpha[k, de.cells],
# Y.saver$beta[k, de.cells])))
# w <- apply(x.samp, 1,
# FUN = function(z)
# unlist(
# wilcox.test(z ~ cell.subtype[
# de.cells.perm])[1]))
# n.x <- ncol(group1)
# n.y <- ncol(group2)
# mean.w <- w-n.x*n.y/2
# var.w <- rep(n.x*n.y/12*(n.x+n.y+1), n.genes)
# mean.z <- sapply(1:n.genes, function(i) (mean.w[i]-sign(mean.w[i])*0.5)/sqrt(var.w[i]))
# mean.p <- sapply(mean.z, function(x) 2*min(pnorm(x), pnorm(x, lower.tail = FALSE)))
# pvaluesrk2 <- cbind(mean.w, mean.p)
# pvaluesrk.adj <- p.adjust(pvaluesrk2[, 2], method = "BH")
# group1.mean <- rowMeans(group1)
# group2.mean <- rowMeans(group2)
# fold.change <- log2(group1.mean/group2.mean)
# Y.sig.saver <- data.frame(genes = rownames(x),
# statistic = pvaluesrk2[, 1],
# variance = var.w,
# pvalue.rk = pvaluesrk2[, 2],
# pvalue.adj = pvaluesrk.adj,
# log2.fold.chg = fold.change, stringsAsFactors = FALSE)
#
# cdat <- data.frame(class2 = class2.cells[de.cells.perm])
# Y.sca <- FromMatrix(log2(Y.sub.norm+1), cdat)
# cdr <- colSums(assay(Y.sca) > 0)
# colData(Y.sca)$cngeneson <- scale(cdr)
# zlm.Y <- zlm.SingleCellAssay(~ class2 + cngeneson, Y.sca)
# summary.Y <- summary(zlm.Y, doLRT='class2CA1Pyr2')
# summary.YDt <- summary.Y$datatable
# fcHurdle.Y <- merge(summary.YDt[contrast=='class2CA1Pyr2' & component=='H',
# .(primerid, `Pr(>Chisq)`)], #hurdle P values
# summary.YDt[contrast=='class2CA1Pyr2' & component=='logFC',
# .(primerid, coef, ci.hi, ci.lo)], by='primerid')
# fcHurdle.Y[,fdr:=p.adjust(`Pr(>Chisq)`, 'fdr')]
# Y.sig.mast <- data.frame(fcHurdle.Y)
#
# cell.subtype1 <- class2.cells[de.cells.perm]
# columnData <- data.frame(group = cell.subtype1,
# row.names = colnames(Y.sub.norm))
# phenoData <- data.frame(condition = ifelse(columnData$group == "CA1Pyr1", 1, 2),
# row.names = colnames(Y.sub.norm))
# phenoData <- as(phenoData, "AnnotatedDataFrame")
# Y.se <- ExpressionSet(assayData = as.matrix(Y.sub.norm),
# phenoData = phenoData)
# prior_param <- list(alpha=0.01, mu0=0, s0=0.01, a0=0.01, b0=0.01)
# Y.sig.scdd <- scDD(Y.se, prior_param=prior_param, n.cores = 1)
#
# sg <- factor(class2.cells[de.cells.perm],
# levels = c("CA1Pyr1", "CA1Pyr2"))
# names(sg) <- colnames(Y.sub)
# Y.err <- scde.error.models(counts = Y.sub, groups = sg, n.cores = 1,
# threshold.segmentation = TRUE,
# save.crossfit.plots = FALSE,
# save.model.plots = FALSE)
# valid.cells.Y <- Y.err$corr.a > 0
# Y.err <- Y.err[valid.cells.Y, ]
# Y.prior <- scde.expression.prior(models = Y.err, counts = Y.sub,
# length.out = 400, show.plot = FALSE)
# Y.sig.scde <- scde.expression.difference(Y.err, Y.sub, Y.prior, groups = sg,
# n.randomizations = 100, n.cores = 1,
# verbose = 1)
# Y.sig.scde$p.value <- 2*pnorm(abs(Y.sig.scde$cZ), lower.tail = FALSE)
# return(list(saver = Y.sig.saver, mast = Y.sig.mast,
# scdd = Y.sig.scdd,
# scde = Y.sig.scde))
# }
#
# saveRDS(list(X.fp, Y.fp), "SAVER-data/de_fdr.rds")
###############################################################################
### DE analysis
de.saver <- readRDS("SAVER-data/de_saver.rds")
de.mast <- readRDS("SAVER-data/de_mast.rds")
de.scdd <- readRDS("SAVER-data/de_scdd.rds")
de.scde <- readRDS("SAVER-data/de_scde.rds")
perm <- readRDS("SAVER-data/de_fdr.rds")
X.sig <- c(sum(de.saver[[1]]$pvalue.adj < 0.01),
sum(de.mast[[1]]$fdr < 0.01),
sum(de.scde[[1]]$p.values < 0.01),
sum(de.scdd[[1]]$Genes$nonzero.pvalue.adj < 0.01, na.rm = TRUE),
sum(de.scdd[[1]]$Genes$zero.pvalue.adj < 0.01, na.rm = TRUE))
names(X.sig) <- c("SAVER", "MAST", "SCDE", "SCDD1", "SCDD2")
Y.sig <- c(sum(de.saver[[2]]$pvalue.adj < 0.01),
sum(de.mast[[2]]$fdr < 0.01),
sum(de.scde[[2]]$p.value < 0.01),
sum(de.scdd[[2]]$Genes$nonzero.pvalue.adj < 0.01, na.rm = TRUE),
sum(de.scdd[[2]]$Genes$zero.pvalue.adj < 0.01, na.rm = TRUE))
names(Y.sig) <- c("SAVER", "MAST", "SCDE", "SCDD1", "SCDD2")
total.sig <- rbind(X.sig, Y.sig)
scdd.n2 <- c(sum(!is.na(de.scdd[[1]]$Genes$zero.pvalue)),
sum(!is.na(de.scdd[[2]]$Genes$zero.pvalue)))
pvalue.cut <- 0.01/3529*total.sig
pvalue.cut[, 5] <- pvalue.cut[, 5]*3529/scdd.n2
# remove bad runs in down-sampled
bad.run <- which(sapply(perm[[2]], function(x) is.null(x[[2]])))
perm[[2]] <- perm[[2]][-bad.run]
fp <- matrix(0, 2, 5)
for (i in 1:2) {
fp[i, 1] <- mean(sapply(perm[[i]], function(x) sum(x[[1]]$pvalue.rk < pvalue.cut[i, 1])))
fp[i, 2] <- mean(sapply(perm[[i]], function(x) sum(x[[2]]$Pr..Chisq. < pvalue.cut[i, 2])))
fp[i, 3] <- mean(sapply(perm[[i]], function(x) sum(abs(x[[4]]$Z) > qnorm(pvalue.cut[i, 3]/2,
lower.tail = FALSE))))
fp[i, 4] <- mean(sapply(perm[[i]], function(x) sum(x[[3]]$Genes$nonzero.pvalue < pvalue.cut[i, 4],
na.rm = TRUE)))
fp[i, 5] <- mean(sapply(perm[[i]], function(x) sum(x[[3]]$Genes$zero.pvalue < pvalue.cut[i, 5],
na.rm = TRUE)))
}
fdr <- fp/total.sig
fdr[, 4] <- (fdr[, 4]*total.sig[, 4] + fdr[, 5]*total.sig[, 5])/(total.sig[, 4] + total.sig[, 5])
total.sig[, 4] <- total.sig[, 4] + total.sig[, 5]
pdf("plots/fig2c_de.pdf", 6.5, 3.25)
par(mfrow = c(1, 2), cex.main = 1.5, mar = c(2, 3, 0, 1.5) + 0.1, oma = c(2, 2, 2, 0),
mgp = c(3.5, 1, 0),
cex.lab = 1.5, font.lab = 2, cex.axis = 1.3, bty = "n", las = 1)
plot(0, type = "n", ylab = " ", xlab = " ", cex = 1.5,
ylim = c(0, 3500), xlim = c(1, 7.5), lwd = 2, pch = 5, axes = FALSE, main = " ")
# axis(1, at = c(1.85, 3.35, 4.85, 6.35, 7.85, 9.35), labels = c("Obs", "SAVER", "Reg",
# "KNN", "SVD", "RF"))
axis(1, at = c(3, 6.5), labels = FALSE, pos = 0, lwd.ticks = 0)
axis(1, at = c(1.1, 7.5), labels = FALSE, lwd.ticks = 0, pos = 0)
text(c(2.75, 5.75), par()$usr[3]-100,
labels = c("Reference", "Observed"), xpd = TRUE, cex = 1.2)
par(las = 1)
axis(2, pos = 1.1)
par(las = 0)
mtext("Significant genes", side = 2, line = 3.5, cex = 1.3)
xloc <- seq(1.75, 3.75, 0.5)
fill <- c("#a50f15", "#3182bd", "#74c476", "#cbc9e2")
for (i in 1:2) {
for (j in 1:4) {
j. <- c(1, 2, 4, 3)[j]
rect(xloc[j]+3*(i-1), 0, xloc[j+1]+3*(i-1), total.sig[i, j.],
col = fill[j], lwd = 2)
}
}
plot(0, type = "n", ylab = " ", xlab = " ", cex = 1.5,
ylim = c(0, 0.03), xlim = c(1, 7.5), lwd = 2, pch = 5, axes = FALSE, main = " ")
# axis(1, at = c(1.85, 3.35, 4.85, 6.35, 7.85, 9.35), labels = c("Obs", "SAVER", "Reg",
# "KNN", "SVD", "RF"))
axis(1, at = c(2, 6), labels = FALSE, pos = 0, lwd.ticks = 0)
axis(1, at = c(1.1, 7.5), labels = FALSE, lwd.ticks = 0, pos = 0)
text(c(2.75, 5.75), par()$usr[3]-0.001,
labels = c("Reference", "Observed"), xpd = TRUE, cex = 1.2)
par(las = 1)
axis(2, pos = 1.1)
par(las = 0)
mtext("Estimated FDR", side = 2, line = 4, cex = 1.3)
xloc <- seq(1.75, 3.75, 0.5)
fill <- c("#a50f15", "#3182bd", "#74c476", "#cbc9e2")
segments(1.1, 0.01, 7.75, 0.01, lwd = 2, col = "darkgray", lty = 2)
for (i in 1:2) {
for (j in 1:4) {
j. <- c(1, 2, 4, 3)[j]
rect(xloc[j]+3*(i-1), 0, xloc[j+1]+3*(i-1), fdr[i, j.],
col = fill[j], lwd = 2)
}
}
par(mfrow = c(1, 1))
plot(0, type = "n", xlab = "", ylab = "", axes = FALSE)
legend("center", c("SAVER", "MAST", "scDD", "SCDE"), pch = 22,
pt.bg = fill, cex = 1.2, ncol = 3, text.font = 1,
pt.cex = 2, pt.lwd = 2, xjust = 0.5, yjust = 0,
xpd = TRUE, bty = "n")
dev.off()