forked from roberta-davidson/PopGen-Scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
MDS_plot.R
92 lines (75 loc) · 3.34 KB
/
MDS_plot.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
library(tidyverse)
library(ape)
library(vegan)
library(data.table)
library(ggrepel)
library(ade4)
library(stats)
library(adegenet)
library(phytools)
### Multi-Dimensional Scaling Plot from inverse matrix of pairwise f3 statistics ###
setwd("/path/f3_ind")
df=read.table("/path/f3_ind/data31_.R.qp3Pop.out",
col.names=c("IndA", "IndB", "PopC", "F3", "StdErr", "Z", "SNPs"))
labs=read.table("/path/f3_ind/data31_labels.txt",
col.names=c("Ind","Pop"))
#calculate inverse f3 stat
df <- df %>% mutate(inverse_F3=F3^-1) %>%
mutate(negF3=1-F3)
#### MDS plot ####
#extract cols for plot
MDS_f3 <- select(df,IndA,IndB,negF3)
MDS_f3 <- filter(MDS_f3, IndA!="USR1", IndB!="USR1") #remove Anzick
MDS_f3 <- filter(MDS_f3, IndA!="USR1", IndB!="USR1") #remove Anzick
#add pop labels to each individual
MDS_f3 <- full_join(MDS_f3,labs, by=c("IndA"="Ind"))
MDS_f3 <- full_join(MDS_f3,labs, by=c("IndB"="Ind"), suffix=c("A","B"))
#reomve pairs within same population (OPTIONAL!!)
MDS_f3 <- filter(MDS_f3, PopA!=PopB!)
#remove pop names again before pivoting
MDS_f3 <- select(MDS_f3,IndA,IndB,negF3)
#pivot wide
MDS <- pivot_wider(MDS_f3, names_from=IndA, values_from = negF3, values_fill = 0.000000)
#remove sample names column
MDS_matrix <- MDS[, -1]
#set format as matrix
MDS_matrix <- dist(t(as.matrix(MDS_matrix)))
#caluculate MDS
mds1 <- cmdscale(MDS_matrix, k=2, eig=TRUE)
#set output as dataframe
mds_n <- as.data.frame(mds1[["points"]])
setDT(mds_n, keep.rownames = TRUE)
#merge with pop labels
mds_labs <- full_join(mds_n,labs, by=c("rn"="Ind"))
#plot
plot <- ggplot(mds_labs, aes(x=V1, y=-V2, fill = mds_labs$Pop)) + #plot first two vectors of MDS
geom_point(size=4, shape=21) + #modify points
ggalt::geom_encircle(size=0, color=mds_labs$Pop, alpha=0.2) + #draw circles around populations
theme_light() +
scale_fill_manual(values = c("#5BC04B","#B59190","#00D89F","#7156AC","#FEBF04","black","#DA3487","#195FFF","red")) +
scale_color_manual(values = c("#5BC04B","#B59190","#00D89F","#7156AC","#FEBF04","black","#DA3487","#195FFF","red")) +
geom_text_repel(data = mds_labs, aes(label=rn), segment.size = 0.1, segment.color = "grey50") + #text labels
theme(panel.grid.major = element_blank(), #remove major gridline
panel.grid.minor = element_blank(), #remove minor gridline
axis.line=element_blank(),
axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.ticks=element_blank(),
panel.background=element_blank(),
panel.border=element_blank(),
plot.background=element_blank(),
legend.position = "right",
legend.key.size = unit(1, 'cm'), #change legend key size
legend.key.height = unit(1, 'cm'), #change legend key height
legend.key.width = unit(1, 'cm'), #change legend key width
legend.title = element_text(size=14), #change legend title font size
legend.text = element_text(size=14),
axis.title = element_text(size = 12)) + #change legend text font size
geom_hline(yintercept = 0, color = "black", alpha=0.5, size=0.5) +
geom_vline(xintercept = 0, color = "black", alpha=0.5, size=0.5) +
# scale_y_continuous(limits=c(-0.2,0.3)) +
# scale_x_continuous(limits=c(-0.4,0.3)) +
labs(x="Dimension 1", y= "Dimension 2", color="Population", fill="Population",
title = "Multidimensional Scaling Plot of 1-f3(Mbuti; Ind1, Ind2).")
plot
ggplotly(plot)