Skip to content

Commit 92961b1

Browse files
committed
using lumi package
how to get the expression profile matrix and what't the correct group information
1 parent af2bbf5 commit 92961b1

File tree

1 file changed

+105
-0
lines changed

1 file changed

+105
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
rm(list=ls())
2+
library(lumi)
3+
studyID = 'GSE30669'
4+
setwd('G:/array/GSE30669')
5+
# ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE30nnn/GSE30669/suppl/GSE30669_HEK_Sample_Probe_Profile.txt.gz
6+
fileName <- 'GSE30669_HEK_Sample_Probe_Profile.txt'
7+
x.lumi <- lumiR.batch(fileName) ##, sampleInfoFile='sampleInfo.txt')
8+
## it make me so sad that I can't find the sampleInfo.txt
9+
pData(phenoData(x.lumi))
10+
11+
## Do all the default preprocessing in one step
12+
lumi.N.Q <- lumiExpresso(x.lumi)
13+
### retrieve normalized data
14+
dataMatrix <- exprs(lumi.N.Q)
15+
## To speed up the processing and reduce false positives, remove the unexpressed and un-annotated genes
16+
presentCount <- detectionCall(x.lumi)
17+
selectDataMatrix <- dataMatrix[presentCount > 0,]
18+
probe2target=pData(featureData(x.lumi))
19+
## estimate the detect call (percentage of expressed genes) of each sample
20+
temp <- detectionCall(x.lumi, type='sample')
21+
print(temp)
22+
## estimate the present count of each gene (probe)
23+
temp <- detectionCall(x.lumi, type='probe')
24+
hist(temp)
25+
26+
## QC, sampleRelation.png is especially important.
27+
QClumi <- function(example.lumi){
28+
width <- 800;height <- 800;
29+
summary(example.lumi, 'QC')
30+
png('1_density.png',width = width, height = height)
31+
plot(example.lumi, what='density') ## plot the density
32+
dev.off()
33+
png('2_cdf.png',width = width, height = height)
34+
plotCDF(example.lumi)
35+
dev.off()
36+
png('3_pairwise.png',width = width*2, height = height*2)
37+
plot(example.lumi, what='pair') ## pairwise plots
38+
#plot(example.lumi, what='pair', smoothScatter=T)
39+
dev.off()
40+
png('4_pairwiseMAplot.png',width = width*2, height = height*2)
41+
plot(example.lumi, what='MAplot')
42+
#plot(example.lumi, what='MAplot', smoothScatter=T)
43+
dev.off()
44+
png('5_density_plot_of_coefficient_of_varience.png',width = width, height = height)
45+
plot(example.lumi, what='cv')
46+
dev.off()
47+
png('6_sampleRelation.png',width = width, height = height)
48+
plot(example.lumi, what='sampleRelation')
49+
dev.off()
50+
png('7_MDS.png',width = width, height = height)
51+
plot(example.lumi, what='sampleRelation', method='mds' )
52+
dev.off()
53+
}
54+
QClumi(x.lumi)
55+
56+
57+
# library(GEOquery)
58+
# library(limma)
59+
# GSE30669 <- getGEO('GSE30669', destdir='.',getGPL = F)
60+
# exprSet=exprs(GSE30669[[1]])
61+
# GSE30669[[1]]
62+
# pdata=pData(GSE30669[[1]])
63+
# exprSet=exprs(GSE30669[[1]])
64+
65+
66+
###################################################
67+
### code chunk number 31: Identify differentially expressed genes
68+
###################################################
69+
## Specify the sample type
70+
# It's wrorng to just arrange them in order.
71+
# sampleType <- factor(c(rep('PMN',3),rep('PDK1',3),rep('MYC',3),rep('E545K',3)))
72+
# limmaArg='PDK1-PMN,MYC-PMN,E545K-PMN'
73+
sampleType <- factor( c('G1','G4','G3','G2','G1','G4','G3','G2','G1','G2','G3','G4' ) )
74+
limmaArg='G2-G1,G3-G1,G4-G1,G4-G2,G3-G2,G4-G3'
75+
## we should check the sampleRelation.png to group them properly, but we could just give them a anonymous label.
76+
if (require(limma)) {
77+
design <- model.matrix(~0+ sampleType )
78+
colnames(design) <- levels(sampleType)
79+
fit <- lmFit(selectDataMatrix, design)
80+
81+
contrastsCommand=unlist(strsplit(limmaArg, split=","))
82+
cont.matrix <- makeContrasts(contrasts=contrastsCommand, levels=design)
83+
fit2=contrasts.fit(fit,cont.matrix)
84+
fit2=eBayes(fit2)
85+
options(digits = 4)
86+
for(i in 1:length(contrastsCommand)){
87+
tempOutFile <- paste(studyID,".diffexp.", contrastsCommand[i],".csv", sep="")
88+
tempOutput = topTable(fit2, coef=i, n=Inf)
89+
tempOutput = na.omit(tempOutput)
90+
#### change probeID to geneSymbol according to the probe2target !!!
91+
tempOutput$geneSymbol=probe2target[match(rownames(tempOutput),probe2target[,1]),2]
92+
write.csv(tempOutput,tempOutFile,quote=FALSE,row.names = F)
93+
}
94+
}
95+
96+
## Significant analysis of microarray identified 1,750, 1,080, and 297 differentially expressed genes
97+
## in these transformed cells when compared with nontransformed control cells, respectively
98+
## false discovery rate < 0.05; P < 0.01;
99+
100+
for(i in 1:length(contrastsCommand)){
101+
tempOutput = topTable(fit2, coef=i, n=Inf)
102+
print(nrow(tempOutput[tempOutput$adj.P.Val<0.05,]))
103+
}
104+
105+

0 commit comments

Comments
 (0)