@@ -146,8 +146,10 @@ plot(
146
146
``` {r eda1}
147
147
library(GSE5859Subset)
148
148
data(GSE5859Subset) ##this loads three tables
149
- c(class(geneExpression), class(sampleInfo))
150
- rbind(dim(geneExpression), dim(sampleInfo))
149
+ class(geneExpression)
150
+ dim(geneExpression)
151
+ class(sampleInfo)
152
+ dim(sampleInfo)
151
153
head(sampleInfo)
152
154
```
153
155
@@ -165,7 +167,7 @@ Note that these 8,793 tests are done in about 0.01s
165
167
166
168
## Volcano plots: Example
167
169
168
- ``` {r vp2, fig.height=3 , fig.width=6 }
170
+ ``` {r vp2, echo = FALSE, fig.height=5 , fig.width=5 }
169
171
par(mar = c(4, 4, 0, 0))
170
172
plot(results$dm,
171
173
-log10(results$p.value),
@@ -217,7 +219,7 @@ hist(permresults$p.value)
217
219
## P-value histograms: Summary
218
220
219
221
- Give a quick look at how many significant p-values there may be
220
- - When using permuted labels, can exposes non-independence among the samples
222
+ - When using permuted labels, can expose non-independence among the samples
221
223
+ can be due to batch effects or family structure
222
224
- Most common approaches for correcting batch effects are:
223
225
+ ` ComBat ` : corrects for known batch effects by linear model), and
@@ -249,7 +251,7 @@ plot((geneExpression[, 1] + pseudo) / 2, (geneExpression[, 1] - pseudo))
249
251
## Heatmaps
250
252
251
253
* Detailed representation of high-dimensional dataset.
252
- - ` ComplexHeatmap ` package is the best as of 2023 : large datasets, interactive heatmaps, simple defaults but many customizations possible
254
+ - ` ComplexHeatmap ` package is the best as of 2024 : large datasets, interactive heatmaps, simple defaults but many customizations possible
253
255
254
256
``` {r ma1, fig.width=12, echo=FALSE}
255
257
suppressPackageStartupMessages(library(ComplexHeatmap))
@@ -271,7 +273,7 @@ Heatmap(ge, use_raster = FALSE, top_annotation = column_ha, right_annotation = r
271
273
272
274
## Heatmaps: Summary
273
275
274
- - Clustering becomes slow and memory-intensivefor thousands of rows
276
+ - Clustering becomes slow and memory-intensive for thousands of rows
275
277
- probably too detailed for thousands of rows
276
278
- can show co-expressed genes, groups of samples
277
279
@@ -320,7 +322,7 @@ RColorBrewer::display.brewer.all(n = 7)
320
322
``` {r ge, message=FALSE}
321
323
library(Biobase)
322
324
library(genefilter)
323
- library(GSE5859) ## BiocInstaller::biocLite("genomicsclass/GSE5859")
325
+ library(GSE5859)
324
326
data(GSE5859)
325
327
geneExpression = exprs(e)
326
328
sampleInfo = pData(e)
@@ -347,6 +349,9 @@ table(year, sampleInfo$ethnicity)
347
349
348
350
``` {r ge4, cache=TRUE, warning=FALSE}
349
351
pc <- prcomp(t(geneExpression), scale. = TRUE)
352
+ ```
353
+
354
+ ``` {r, echo=FALSE, warning=FALSE}
350
355
boxplot(
351
356
pc$x[, 1] ~ month,
352
357
varwidth = TRUE,
@@ -419,7 +424,7 @@ table(hcclass, year)
419
424
420
425
## Exercises
421
426
422
- * OSCA Multi-sample [ Chapter 1: Correcting batch effects] ( http://bioconductor.org/books/3.17 /OSCA.multisample/integrating-datasets.html )
427
+ * OSCA Multi-sample [ Chapter 1: Correcting batch effects] ( http://bioconductor.org/books/release /OSCA.multisample/integrating-datasets.html )
423
428
424
429
## Links
425
430
0 commit comments