1
+ rm(list = ls())
2
+ library(lumi )
3
+ studyID = ' GSE30669'
4
+ setwd(' G:/array/GSE30669' )
5
+ # ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE30nnn/GSE30669/suppl/GSE30669_HEK_Sample_Probe_Profile.txt.gz
6
+ fileName <- ' GSE30669_HEK_Sample_Probe_Profile.txt'
7
+ x.lumi <- lumiR.batch(fileName ) # #, sampleInfoFile='sampleInfo.txt')
8
+ # # it make me so sad that I can't find the sampleInfo.txt
9
+ pData(phenoData(x.lumi ))
10
+
11
+ # # Do all the default preprocessing in one step
12
+ lumi.N.Q <- lumiExpresso(x.lumi )
13
+ # ## retrieve normalized data
14
+ dataMatrix <- exprs(lumi.N.Q )
15
+ # # To speed up the processing and reduce false positives, remove the unexpressed and un-annotated genes
16
+ presentCount <- detectionCall(x.lumi )
17
+ selectDataMatrix <- dataMatrix [presentCount > 0 ,]
18
+ probe2target = pData(featureData(x.lumi ))
19
+ # # estimate the detect call (percentage of expressed genes) of each sample
20
+ temp <- detectionCall(x.lumi , type = ' sample' )
21
+ print(temp )
22
+ # # estimate the present count of each gene (probe)
23
+ temp <- detectionCall(x.lumi , type = ' probe' )
24
+ hist(temp )
25
+
26
+ # # QC, sampleRelation.png is especially important.
27
+ QClumi <- function (example.lumi ){
28
+ width <- 800 ;height <- 800 ;
29
+ summary(example.lumi , ' QC' )
30
+ png(' 1_density.png' ,width = width , height = height )
31
+ plot(example.lumi , what = ' density' ) # # plot the density
32
+ dev.off()
33
+ png(' 2_cdf.png' ,width = width , height = height )
34
+ plotCDF(example.lumi )
35
+ dev.off()
36
+ png(' 3_pairwise.png' ,width = width * 2 , height = height * 2 )
37
+ plot(example.lumi , what = ' pair' ) # # pairwise plots
38
+ # plot(example.lumi, what='pair', smoothScatter=T)
39
+ dev.off()
40
+ png(' 4_pairwiseMAplot.png' ,width = width * 2 , height = height * 2 )
41
+ plot(example.lumi , what = ' MAplot' )
42
+ # plot(example.lumi, what='MAplot', smoothScatter=T)
43
+ dev.off()
44
+ png(' 5_density_plot_of_coefficient_of_varience.png' ,width = width , height = height )
45
+ plot(example.lumi , what = ' cv' )
46
+ dev.off()
47
+ png(' 6_sampleRelation.png' ,width = width , height = height )
48
+ plot(example.lumi , what = ' sampleRelation' )
49
+ dev.off()
50
+ png(' 7_MDS.png' ,width = width , height = height )
51
+ plot(example.lumi , what = ' sampleRelation' , method = ' mds' )
52
+ dev.off()
53
+ }
54
+ QClumi(x.lumi )
55
+
56
+
57
+ # library(GEOquery)
58
+ # library(limma)
59
+ # GSE30669 <- getGEO('GSE30669', destdir='.',getGPL = F)
60
+ # exprSet=exprs(GSE30669[[1]])
61
+ # GSE30669[[1]]
62
+ # pdata=pData(GSE30669[[1]])
63
+ # exprSet=exprs(GSE30669[[1]])
64
+
65
+
66
+ # ##################################################
67
+ # ## code chunk number 31: Identify differentially expressed genes
68
+ # ##################################################
69
+ # # Specify the sample type
70
+ # It's wrorng to just arrange them in order.
71
+ # sampleType <- factor(c(rep('PMN',3),rep('PDK1',3),rep('MYC',3),rep('E545K',3)))
72
+ # limmaArg='PDK1-PMN,MYC-PMN,E545K-PMN'
73
+ sampleType <- factor ( c(' G1' ,' G4' ,' G3' ,' G2' ,' G1' ,' G4' ,' G3' ,' G2' ,' G1' ,' G2' ,' G3' ,' G4' ) )
74
+ limmaArg = ' G2-G1,G3-G1,G4-G1,G4-G2,G3-G2,G4-G3'
75
+ # # we should check the sampleRelation.png to group them properly, but we could just give them a anonymous label.
76
+ if (require(limma )) {
77
+ design <- model.matrix(~ 0 + sampleType )
78
+ colnames(design ) <- levels(sampleType )
79
+ fit <- lmFit(selectDataMatrix , design )
80
+
81
+ contrastsCommand = unlist(strsplit(limmaArg , split = " ," ))
82
+ cont.matrix <- makeContrasts(contrasts = contrastsCommand , levels = design )
83
+ fit2 = contrasts.fit(fit ,cont.matrix )
84
+ fit2 = eBayes(fit2 )
85
+ options(digits = 4 )
86
+ for (i in 1 : length(contrastsCommand )){
87
+ tempOutFile <- paste(studyID ," .diffexp." , contrastsCommand [i ]," .csv" , sep = " " )
88
+ tempOutput = topTable(fit2 , coef = i , n = Inf )
89
+ tempOutput = na.omit(tempOutput )
90
+ # ### change probeID to geneSymbol according to the probe2target !!!
91
+ tempOutput $ geneSymbol = probe2target [match(rownames(tempOutput ),probe2target [,1 ]),2 ]
92
+ write.csv(tempOutput ,tempOutFile ,quote = FALSE ,row.names = F )
93
+ }
94
+ }
95
+
96
+ # # Significant analysis of microarray identified 1,750, 1,080, and 297 differentially expressed genes
97
+ # # in these transformed cells when compared with nontransformed control cells, respectively
98
+ # # false discovery rate < 0.05; P < 0.01;
99
+
100
+ for (i in 1 : length(contrastsCommand )){
101
+ tempOutput = topTable(fit2 , coef = i , n = Inf )
102
+ print(nrow(tempOutput [tempOutput $ adj.P.Val < 0.05 ,]))
103
+ }
104
+
105
+
0 commit comments