Skip to content
This repository was archived by the owner on Mar 11, 2019. It is now read-only.

Commit f919844

Browse files
Luis Francisco Hernández SánchezLuis Francisco Hernández Sánchez
Luis Francisco Hernández Sánchez
authored and
Luis Francisco Hernández Sánchez
committed
Added analysis for g1 & g2 of mm graph.
1 parent b8e57b2 commit f919844

File tree

31 files changed

+452
-53
lines changed

31 files changed

+452
-53
lines changed

docs/statistics.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,8 @@ WITH DISTINCT protein, size(collect(ptmSet)) as ptmSetCount
141141
RETURN min(ptmSetCount), avg(ptmSetCount), max(ptmSetCount)
142142
~~~~
143143

144-
* Get all proteoforms: 13879
144+
* Get all proteoforms:
145+
145146
~~~~
146147
MATCH (pe:PhysicalEntity{speciesName:'Homo sapiens'})-[:referenceEntity]->(re:ReferenceEntity{databaseName:'UniProt'})
147148
WITH DISTINCT pe, re

resources/networks/degreeDistribution/make_percolation_analysis.R

Lines changed: 263 additions & 44 deletions
Large diffs are not rendered by default.

resources/networks/degreeDistribution/percolation.R

Lines changed: 185 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -135,10 +135,9 @@ GetBinsByFactor <- function(graph, factor = 0.9, type = "link") {
135135
return(breaks[!duplicated(breaks)])
136136
}
137137

138-
GetPercolationCurvePoints <- function(graph,
138+
GetPercolationCurvePoints <- function(graph, label,
139139
factor = 0.2,
140140
replicates = 5,
141-
entity = "Unknown",
142141
type = "link",
143142
verbose = TRUE) {
144143

@@ -149,6 +148,7 @@ GetPercolationCurvePoints <- function(graph,
149148
#'
150149
#' Args:
151150
#' graph: The graph in igraph format
151+
#' label: name for the objects in the graph
152152
#' factor: numeric value to define the breaks of the x-axis (size or order) depende on the percolation type argument
153153
#' replicates: Number of replicate measurements for each size
154154
#' entity: mm, pm, pp, proteins, proteoforms...
@@ -215,7 +215,7 @@ GetPercolationCurvePoints <- function(graph,
215215
}
216216
}
217217

218-
samples$Entity <- entity
218+
samples$Entity <- label
219219
names(samples) <- c("Size",
220220
"Order",
221221
"Completeness",
@@ -228,7 +228,80 @@ GetPercolationCurvePoints <- function(graph,
228228
return(samples)
229229
}
230230

231-
PlotPercolationCurve <- function(samples, showRelSize = TRUE, colors = c("blue3", "green3", "red3")) {
231+
GetSubcomponents <- function(graph,
232+
factor = 0.2,
233+
replicates = 5,
234+
entity = "Unknown",
235+
type = "link",
236+
verbose = TRUE) {
237+
238+
breaks <- GetBinsByFactor(graph = graph, factor = factor, type = type)
239+
240+
g1 <- new.env(hash = TRUE)
241+
g2 <- new.env(hash = TRUE)
242+
243+
init1 <- function(s) { g1[[s]] <<- 0L }
244+
init2 <- function(s) { g2[[s]] <<- 0L }
245+
246+
count1 <- function(s) { g1[[s]] <<- g1[[s]] <<- g1[[s]] + 1L }
247+
count2 <- function(s) { g2[[s]] <<- g2[[s]] <<- g2[[s]] + 1L }
248+
249+
lapply(as_ids(V(graph)), init1)
250+
lapply(as_ids(V(graph)), init2)
251+
252+
# Sample all sizes for each replicate
253+
for(r in 1:replicates) {
254+
for (b in breaks) {
255+
if(b <= 10)
256+
break
257+
cat("\n***** Replicate: ", r, "\t Break: ", b, " *****\n\n")
258+
259+
sg <- make_empty_graph(n = 0)
260+
261+
# Reduce the graph to a subgraph
262+
if(type == "link") {
263+
sg <- RemoveNEdges(graph, gsize(graph) - b)
264+
} else {
265+
sg <- RemoveNVertices(graph, gorder(graph) - b)
266+
}
267+
completeness <- (gorder(sg) / gorder(graph)) * (gsize(sg) / gsize(graph))
268+
if(verbose){
269+
cat("Subgraph size: ", gsize(sg), "\n")
270+
cat("Subgraph order: ", gorder(sg), "\n")
271+
cat("Completeness: ", completeness, "\n")
272+
}
273+
274+
lcc <- GetLcc(sg)
275+
276+
if(verbose){
277+
cat("lcc size: ", gsize(lcc), "\n")
278+
cat("lcc order: ", gorder(lcc), "\n")
279+
}
280+
281+
# Separate the groups
282+
283+
if(gsize(lcc)/gsize(graph) >= 0.25) {
284+
lapply( X = as_ids(V(lcc)), FUN = count1)
285+
} else {
286+
lapply( X = as_ids(V(lcc)), FUN = count2)
287+
}
288+
}
289+
}
290+
291+
list1 <- unlist(as.list(g1))
292+
df1 <- data.frame(key = names(list1), value = list1, row.names = NULL)
293+
df1$Group <- "1"
294+
295+
list2 <- unlist(as.list(g2))
296+
df2 <- data.frame(key = names(list2), value = list2, row.names = NULL)
297+
df2$Group <- "2"
298+
299+
df <- rbind(df1, df2)
300+
301+
return(df)
302+
}
303+
304+
PlotPercolationCurve <- function(samples, showRelSize = TRUE, colors = c("blue3", "green3", "red3"), showScaled = FALSE) {
232305

233306
# Make percolation curve plot using point samples ----
234307
#'
@@ -257,9 +330,12 @@ PlotPercolationCurve <- function(samples, showRelSize = TRUE, colors = c("blue3"
257330
geom_line(data = means, aes(x=Completeness, y=RelativeOrderLcc, color = Entity))
258331
}
259332
p <- p + scale_color_manual(values = colors) +
260-
scale_x_log10() +
261-
ggtitle("Node Percolation curve approximation") +
333+
ggtitle("Percolation curve approximation") +
262334
theme(axis.text.x = element_text(angle = 90, hjust = 1))
335+
336+
if(showScaled)
337+
p <- p + scale_x_log10()
338+
263339
return(p)
264340
}
265341

@@ -323,4 +399,107 @@ GetMeasuresExtended <- function (graph, size) {
323399
subPT = GetPercolationThreshold(sg),
324400
subPT2 = GetPT(sg),
325401
lcc = gorder(GetLcc(sg)))
402+
}
403+
404+
MakePercolationAnalysis <- function(graphs, labels, data.path = "data/", plots.path = "plots/", factor = 0.1, replicates = 2) {
405+
406+
# Make different combinations of percolation plots for a set of networks.
407+
#
408+
# Creates random subgrams reducing the size (or order) of each network by the factor.
409+
# For each subgraph performs the measurements and stores the data to csv files.
410+
# Then it makes many combinations of plots and stores it as png files.
411+
#
412+
# Args:
413+
# graphs: list of igraph objects
414+
# labels: atomic vector with a name for each graph
415+
# data.path: where to store the csv files with the data
416+
# plots.path: where to store the plots in png format
417+
# factor: numeric factor to reduce the size (or order) of the graph
418+
# replicates: integer number of replicates for a same subgraph
419+
#
420+
# Returns:
421+
# Nothing in special... just kidding ;) it returns the data frame merging the percolation curve samples for all the graphs
422+
423+
stopifnot(identical(length(graphs), length(labels)))
424+
425+
stopifnot(factor < 1 && factor > 0)
426+
427+
file.name <- paste(labels, collapse = "_")
428+
429+
if(!dir.exists(data.path))
430+
stopif(!dir.create(data.path, showWarnings = FALSE, recursive = TRUE))
431+
432+
if(!dir.exists(plots.path))
433+
stopif(!dir.create(plots.path, showWarnings = FALSE, recursive = TRUE))
434+
435+
# Link percolation
436+
type <- "link"
437+
samples <- data.frame(Size=integer(),
438+
Order=integer(),
439+
Completeness=double(),
440+
SizeLcc=integer(),
441+
OrderLcc=integer(),
442+
RelativeSizeLcc=double(),
443+
RelativeOrderLcc=double(),
444+
Entity=character())
445+
for(i in 1:length(graphs)) {
446+
sample <- GetPercolationCurvePoints(graphs[[i]], labels[i], factor = factor, replicates = replicates, type = type)
447+
samples <- rbind(samples, sample)
448+
}
449+
write.csv(samples, paste(data.path, file.name, "_", type, "_percolation_curve_approximation.csv", sep = ""), row.names=FALSE, na="")
450+
451+
scale <- "log10"
452+
showScaled <- TRUE
453+
454+
plot <- PlotPercolationCurve(samples, showRelSize = F, showScaled = showScaled)
455+
ggsave(paste(plots.path, file.name, "_link_", "relOrder_", scale, "_percolation_curve_approximation.png", sep = ""), width = w)
456+
457+
plot <- PlotPercolationCurve(samples, showRelSize = T, showScaled = showScaled)
458+
ggsave(paste(plots.path, file.name, "_link_", "relSize_", scale, "_percolation_curve_approximation.png", sep = ""), width = w)
459+
460+
scale <- "linear"
461+
showScaled <- FALSE
462+
463+
plot <- PlotPercolationCurve(samples, showRelSize = F, showScaled = showScaled)
464+
ggsave(paste(plots.path, file.name, "_link_", "relOrder_", scale, "_percolation_curve_approximation.png", sep = ""), width = w)
465+
466+
plot <- PlotPercolationCurve(samples, showRelSize = T, showScaled = showScaled)
467+
ggsave(paste(plots.path, file.name, "_link_", "relSize_", scale, "_percolation_curve_approximation.png", sep = ""), width = w)
468+
469+
# Node percolation
470+
type <- "node"
471+
samples <- data.frame(Size=integer(),
472+
Order=integer(),
473+
Completeness=double(),
474+
SizeLcc=integer(),
475+
OrderLcc=integer(),
476+
RelativeSizeLcc=double(),
477+
RelativeOrderLcc=double(),
478+
Entity=character())
479+
480+
for(i in 1:length(graphs)) {
481+
sample <- GetPercolationCurvePoints(graphs[[i]], labels[i], factor = factor, replicates = replicates, type = type)
482+
samples <- rbind(samples, sample)
483+
}
484+
write.csv(samples, paste(data.path, file.name, "_", type, "_percolation_curve_approximation.csv", sep = ""), row.names=FALSE, na="")
485+
486+
scale <- "log10"
487+
showScaled <- TRUE
488+
489+
plot <- PlotPercolationCurve(samples, showRelSize = F, showScaled = showScaled)
490+
ggsave(paste(plots.path, file.name, "_node_", "relOrder_", scale, "_percolation_curve_approximation.png", sep = ""), width = w)
491+
492+
plot <- PlotPercolationCurve(samples, showRelSize = T, showScaled = showScaled)
493+
ggsave(paste(plots.path, file.name, "_node_", "relSize_", scale, "_percolation_curve_approximation.png", sep = ""), width = w)
494+
495+
scale <- "linear"
496+
showScaled <- FALSE
497+
498+
plot <- PlotPercolationCurve(samples, showRelSize = F, showScaled = showScaled)
499+
ggsave(paste(plots.path, file.name, "_node_", "relOrder_", scale, "_percolation_curve_approximation.png", sep = ""), width = w)
500+
501+
plot <- PlotPercolationCurve(samples, showRelSize = T, showScaled = showScaled)
502+
ggsave(paste(plots.path, file.name, "_node_", "relSize_", scale, "_percolation_curve_approximation.png", sep = ""), width = w)
503+
504+
return(samples)
326505
}
Loading
Loading
Loading
Loading
Loading
Loading
Loading

resources/networks/load_networks.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ source("graphs.R")
44

55
# Load protein data
66

7-
proteins.full.graph <- LoadGraph("datasets/all_proteins.tsv.gz")
8-
proteoforms.full.graph <- LoadGraph("datasets/all_proteoforms.tsv.gz")
7+
all.proteins.graph <- LoadGraph("datasets/all_proteins.tsv.gz")
8+
all.proteoforms.graph <- LoadGraph("datasets/all_proteoforms.tsv.gz")
99
pp.graph <- LoadGraph("datasets/pp.tsv.gz")
1010
mm.graph <- LoadGraph("datasets/mm.tsv.gz")
1111
pm.graph <- LoadGraph("datasets/pm.tsv.gz")

0 commit comments

Comments
 (0)