-
Notifications
You must be signed in to change notification settings - Fork 10
/
T_test_&_U_test.R
276 lines (210 loc) · 13 KB
/
T_test_&_U_test.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
"
NOTE: First Column is treated as 1 in the Selection of Data:
1- Please make sure your csv file contains only numeric variables with headers for the code and one
first column with Name of the Elements (for sample check the dataset provided with the
name 'practice file').
Column(Instance) 1 Column(Instance) 2 . . . . Column(Instance) n
Row(Variable) 1 (Value) (Value) . . . . (Value)
Row(Variable) 2 (Value) (Value) . . . . (Value)
. . . .
. . . .
. . . .
. . . .
Row(Variable) n (Value) (Value) . . . . (Value)
2- To run the code, select the whole code and run as source (top right in this window) & enter parameters
which will be asked on running the code in the CONSOLE screen. In this case select:
a- dataset to work on (after screen pops out)
b- Type of separator used in the file
c- Type of test to be performed 'U-test' or 'T-test'
b- Ranges of both groups (Group-1 and Group-2)
c- Parameter(paired) value for the test, either TRUE or FALSE
(Here TRUE indicates that groups are PAIRED, and FALSE indicates that groups are UN-PAIRED)
3- After providing all the parameters, the code will compute following:
* p-Value for the est
a- alternative: the alternative hypothesis. Allowed value is one of “two.sided” (default), “greater” or “less”.
b- conf.level: the default value of the Confidence-level is 0.95
* Bonferoni Correction for p-value
* Benjamin Hochberger Correction for p-value
* Averages of group-1 and group-2
* p & w values for Shapiro Normality test
4- After all the executions the final resulting file(in CSV format) will be saved on your current
working directory. In the resulting file you can find all the calulated values done through the
analysis.
"
#------------------------------------------------
"REQUIRED PACKAGES FOR T-TEST"
#------------------------------------------------
# Cleaning the workspace to start over
cat("\f") # Clear old outputs
rm(list=ls()) # Clear all variables
# Installing Packages
# Package for package administration:
if(!require("tidyr")) install.packages("tidyr") #package for Data-wrangling
if(!require("ggplot2")) install.packages("ggplot2") #package for Data-visualization
if(!require("ggpubr")) install.packages("ggpubr") #package for Data-visualization
if(!require("dplyr")) install.packages("dplyr") #package for Data-wrangling
if(!require("qualityTools")) install.packages("qualityTools") #package for Data-visualization
# Load packages:
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggpubr)
library(qualityTools)
cat("\f") # Clear old outputs
#------------------------------------------------
"SELECTION OF DATSET AND PARAMETERS"
#------------------------------------------------
# Choose a csv file
print(paste("Please select Input CSV", " The different samples in columns and the measured variables in the rows."), quote = FALSE)
fname <- file.choose() #choose all metaprotein.csv
#Choose the Separator for file
ask_sep <- as.character(readline(prompt = "ENTER the SEPARATOR for file(',' or ';') : "))
#file wtih mumerical data only
matrix <- read.csv(fname, sep=ask_sep, row.names = 1)
#Which type of test user will perform:
#Choose the Separator for file
ask_test <- as.character(readline(prompt = "ENTER 't' for t-test & 'u' for u-test: "))
#List for Start of the groups:
group_start <- list()
#List for End of the groups:
group_end <- list()
for (i in 1:2) {
group_start[i] <- as.integer(readline(prompt = "Enter values for Start of group: "))
group_end[i] <- as.integer(readline(prompt = "Enter values for End of group: "))
}
#Sets are paired or unpaired
PairedValue <- readline(prompt = "Enter TRUE/FALSE for Paired or Unpaired in the Test: ")
PairedValue <- as.logical(PairedValue)
#------------------------------------------------
"CALCULATIONS FOR T-TEST OR U-TEST"
#------------------------------------------------
if (ask_test == 't'){
"CALCULATIONS FOR T-TEST"
#File name of resulting csv
outputname = "Result_of_T_test.csv"
# Create new output vectors for the results:
EMPTY_First <- (rep("",nrow(matrix))) # + 1
pValue_TTest <- (rep(0, nrow(matrix))) # + 2
BonferoniCorrection_TTest <- (rep(0, nrow(matrix))) # + 3
BenjaminHochbergerCorrection_TTest <- (rep(0, nrow(matrix))) # + 4
EMPTY_Second <- (rep("", nrow(matrix)))# + 5
average_group_1 <- (rep(0, nrow(matrix))) # + 6
average_group_2 <- (rep(0, nrow(matrix))) # + 7
average_group_1div2 <- (rep(0, nrow(matrix))) # + 8
Log2_average_group_1div2 <- (rep(0, nrow(matrix))) # + 9
shapiro_Wvalue <- (rep(0, nrow(matrix))) # + 10
shapiro_Pvalue <- (rep(0, nrow(matrix))) # + 11
#Combined it to the output matrix:
newMatrix <- cbind( matrix, EMPTY_First,pValue_TTest, BonferoniCorrection_TTest, BenjaminHochbergerCorrection_TTest,
EMPTY_Second, average_group_1, average_group_2, average_group_1div2, Log2_average_group_1div2,
shapiro_Wvalue,shapiro_Pvalue)
#Normality Test"
#Shapiro test values to check normality:
for (i in 1:nrow(matrix)) {
newMatrix[i,ncol(matrix) + 10] <- shapiro.test(as.numeric(matrix[i,]))$statistic # w-value
newMatrix[i,ncol(matrix) + 11] <- shapiro.test(as.numeric(matrix[i,]))$p.value #p-value
}
# Calculate the T-TEST for all variables:
for (i in 1:nrow(matrix)) {
#Average of Groups:
newMatrix[i,ncol(matrix) + 6] <- mean(as.numeric( matrix[i, group_start[[1]] : group_end[[1]] ] ))
newMatrix[i,ncol(matrix) + 7] <- mean(as.numeric( matrix[i, group_start[[2]] : group_end[[2]] ] ))
newMatrix[i,ncol(matrix) + 8] <- mean( as.numeric(matrix[i, group_start[[1]] : group_end[[1]] ] ))/
mean(as.numeric( matrix[i, group_start[[2]] : group_end[[2]] ] ))
newMatrix[i,ncol(matrix) + 9] <- log2(newMatrix[i,ncol(matrix) + 8])
# Calculation of P-Value for t-Test:
newMatrix[i,ncol(matrix)+ 2] <- t.test(as.numeric(matrix[i,group_start[[1]] : group_end[[1]] ]),
as.numeric(matrix[i,group_start[[2]] : group_end[[2]] ]),
alternative = c("two.sided"), paired = PairedValue,
conf.level = 0.95)$p.value
}
# Bonferroni and Benjamin-Hochberger Corrections for T-Test:
#Save P-Value of T-Test in 'p':
p <- newMatrix[1:nrow(matrix),ncol(matrix)+ 2]
#apply Bonferroni correction:
newMatrix[1:nrow(matrix),ncol(matrix)+ 3] <- p.adjust(p,method = "bonferroni", n = length(p))
#apply Benjamin-Hochberger correction:
newMatrix[1:nrow(matrix),ncol(matrix)+ 4] <- p.adjust(p, method = "BH", n = length(p))
} else if( ask_test == 'u'){
# Output file name:
outputname = "Result_of_U_test.csv"
"CALCULATIONS FOR U-TEST"
# Create new output vectors for the results:
EMPTY_First <- (rep("",nrow(matrix))) # + 1
pValue_UTest <- (rep(0, nrow(matrix))) # + 2
BonferoniCorrection_UTest <- (rep(0, nrow(matrix))) # + 3
BenjaminHochbergerCorrection_UTest <- (rep(0, nrow(matrix))) # + 4
EMPTY_Second <- (rep("", nrow(matrix)))# + 5
average_group_1 <- (rep(0, nrow(matrix))) # + 6
average_group_2 <- (rep(0, nrow(matrix))) # + 7
average_group_1div2 <- (rep(0, nrow(matrix))) # + 8
Log2_average_group_1div2 <- (rep(0, nrow(matrix))) # + 9
shapiro_Wvalue <- (rep(0, nrow(matrix))) # + 10
shapiro_Pvalue <- (rep(0, nrow(matrix))) # + 11
# Combined it to the output matrix:
newMatrix <- cbind( matrix, EMPTY_First, pValue_UTest, BonferoniCorrection_UTest, BenjaminHochbergerCorrection_UTest,
EMPTY_Second, average_group_1, average_group_2, average_group_1div2, Log2_average_group_1div2,
shapiro_Wvalue,shapiro_Pvalue)
"Normality Test"
#Shapiro test values to check normality:
for (i in 1:nrow(matrix)) {
newMatrix[i,ncol(matrix) + 10] <- shapiro.test(as.numeric(matrix[i,]))$statistic # w-value
newMatrix[i,ncol(matrix) + 11] <- shapiro.test(as.numeric(matrix[i,]))$p.value #p-value
}
# Calculate the U-TEST for all variables:
for (i in 1:nrow(matrix)) {
#Average of Groups:
newMatrix[i,ncol(matrix) + 6] <- mean(as.numeric( matrix[i, group_start[[1]] : group_end[[1]] ] ))
newMatrix[i,ncol(matrix) + 7] <- mean(as.numeric( matrix[i, group_start[[2]] : group_end[[2]] ] ))
newMatrix[i,ncol(matrix) + 8] <- mean( as.numeric( matrix[i, group_start[[1]] : group_end[[1]] ] ))/
mean(as.numeric( matrix[i, group_start[[2]] : group_end[[2]] ] ))
newMatrix[i,ncol(matrix) + 9] <- log2(newMatrix[i,ncol(matrix) + 8])
# Calculation of P-Value for U-Test
newMatrix[i,ncol(matrix)+ 2] <- wilcox.test(as.numeric(matrix[i,group_start[[1]]:group_end[[1]]]),
as.numeric(matrix[i,group_start[[2]]:group_end[[2]]]),
alternative = c("two.sided"), paired = PairedValue,
conf.level = 0.95)$p.value
}
# Bonferroni and Benjamin-Hochberger Corrections for P-VALUES:
# Save P-Value of U-Test in 'p':
p <- newMatrix[1:nrow(matrix),ncol(matrix)+ 2]
#apply Bonferroni correction:
newMatrix[1:nrow(matrix),ncol(matrix)+ 3] <- p.adjust(p,method = "bonferroni", n = length(p))
#apply Benjamin-Hochberger correction:
newMatrix[1:nrow(matrix),ncol(matrix)+ 4] <- p.adjust(p, method = "BH", n = length(p))
}
#--------------------------------------------------
"VISUALIZATIONS FOR T-TEST OR U-TEST"
#--------------------------------------------------
if (ask_test == 't'){
"Visualization for Individual test"
#Scatter-Plot for T-Test according to P-Value:
pValueGreatTTest <- newMatrix %>% filter(pValue_TTest>=0.05) #filter matrix with p value >0.05
pVlaueLessTTest <- newMatrix %>% filter(pValue_TTest <= 0.05) #filter matrix with p value <0.05
# Visualization for T-Test:
ggplot()+geom_point(aes(x=pValueGreatTTest$average_group_1,y=pValueGreatTTest$average_group_2,color= '>0.05'))+
geom_point(aes(x=pVlaueLessTTest$average_group_1,y=pVlaueLessTTest$average_group_2,color= '<0.05'))+
geom_abline(aes(intercept=0,slope=1),color='blue')+xlab('Average of Group1')+ylab('Average of Group2')+labs(color='P-values')+
ggtitle("Group abandance w.r.t P-Value(T-Test)") + theme(plot.title = element_text(hjust = 0.5))
} else if( ask_test == 'u'){
#Scatter-Plot for U-Test according to P-Value:
pValueGreatUTest <- newMatrix %>% filter(pValue_UTest>=0.05) #filter matrix with p value >0.05
pVlaueLessUTest <- newMatrix %>% filter(pValue_UTest <= 0.05) #filter matrix with p value <0.05
# Visualization for U-Test:
ggplot()+geom_point(aes(x=pValueGreatUTest$average_group_1,y=pValueGreatUTest$average_group_2,color= '>0.05'))+
geom_point(aes(x=pVlaueLessUTest$average_group_1,y=pVlaueLessUTest$average_group_2,color= '<0.05'))+
geom_abline(aes(intercept=0,slope=1),color='blue')+xlab('Average of Group1')+ylab('Average of Group2')+labs(color='P-values')+
ggtitle("Group abandance w.r.t P-Value(U-Test)") + theme(plot.title = element_text(hjust = 0.5))
}
cat("\f") # Clear old outputs
#--------------------------------------------------
"EXPORT THE RESULTS INTO CSV"
#--------------------------------------------------
# Output file:
write.table(newMatrix, file = paste(outputname), append = FALSE, quote = TRUE, sep = ask_sep,
eol = "\n", na = "NA", dec = ".", row.names = TRUE,
col.names = NA, qmethod = c("escape", "double"),
fileEncoding = "")
print(paste("FINISHED"), quote = FALSE)
options(warn = -1)
cat("\f") # Clear old outputs