Skip to content

Commit

Permalink
clean up
Browse files Browse the repository at this point in the history
  • Loading branch information
Karin Schork committed Mar 25, 2024
1 parent b1c014f commit ba6131f
Show file tree
Hide file tree
Showing 13 changed files with 22 additions and 162 deletions.
12 changes: 2 additions & 10 deletions R/generate_graphs_from_FASTA.R
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
### TODO: Matrix package wird auf jeden Fall benötigt

#' Generate graphs from a FASTA file
#'
#' @param fasta fasta file, already read into R by seqinr::read.fasta
#' @param collapse_protein_nodes collapse protein nodes?
#' @param collapse_peptide_nodes collapse peptide nodes?
#' @param result_path path whereresults are saved. If NULL, results are not saved
#' @param result_path path where results are saved. If NULL, results are not saved
#' @param suffix suffix for saving results
#' @param save_intermediate Save intermediate results?
#' @param ... additional arguments to bppg::digest_fasta()
Expand All @@ -28,7 +26,7 @@ generate_graphs_from_FASTA <- function(fasta, collapse_protein_nodes = TRUE,
...) {

message("Digesting FASTA file...")
digested_proteins <- bppg::digest_fasta(fasta, ...)#, ...)
digested_proteins <- bppg::digest_fasta(fasta, ...)
message("Generating edgelist ...")
edgelist <- bppg::generate_edgelist(digested_proteins, prot_origin = prot_origin)
if (save_intermediate) {
Expand Down Expand Up @@ -72,9 +70,3 @@ generate_graphs_from_FASTA <- function(fasta, collapse_protein_nodes = TRUE,



###


###


24 changes: 2 additions & 22 deletions R/generate_graphs_from_quantdata.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@


#' Generate graphs from peptide ratio table, using an edgelist calculated on the fasta file
#'
#' @param peptide_ratios table with peptide ratios
Expand All @@ -10,8 +8,6 @@
#' @export
#'
#' @examples
#' ### TODO: Einstellbar, ob Peptid-Knoten auch gemergt werden sollen (dann mit geom. Mittel als peptid-ratio).
#' #### Das funktioniert noch nicht!!!
generate_quant_graphs <- function(peptide_ratios, id_cols = 1, fasta_edgelist, outpath = NULL, seq_column = "Sequence",
collapse_protein_nodes = TRUE, collapse_peptide_nodes = FALSE, suffix = "") {

Expand All @@ -30,14 +26,9 @@ generate_quant_graphs <- function(peptide_ratios, id_cols = 1, fasta_edgelist, o
colnames_split <- limma::strsplit2(colnames(peptide_ratios), "_")
comparisons <- paste(colnames_split[,2], colnames_split[,3], sep = "_")


# graphs <- list()
subgraphs <- list()
#i = 3

### TODO: progress bar!
for (i in 1:ncol(peptide_ratios)) { #

for (i in 1:ncol(peptide_ratios)) {
comparison <- comparisons[i]

fc <- peptide_ratios[,i]
Expand All @@ -63,8 +54,6 @@ generate_quant_graphs <- function(peptide_ratios, id_cols = 1, fasta_edgelist, o
G[[j]] <- igraph::set_vertex_attr(graph = G[[j]], name = "pep_ratio",
index = igraph::V(G[[j]])[!igraph::V(G[[j]])$type],
value = edgelist_coll$pep_ratio[match(igraph::V(G[[j]])$name[!igraph::V(G[[j]])$type], edgelist_coll$peptide)])

#peptide_ratios[match(igraph::V(G[[j]])$name[!igraph::V(G[[j]])$type], id[, seq_column]), i]
}

subgraphs[[i]] <- G
Expand All @@ -77,9 +66,6 @@ generate_quant_graphs <- function(peptide_ratios, id_cols = 1, fasta_edgelist, o
}



### TODO: save end and intermediate results

#' Generate graphs from quantitative peptide-level data
#'
#' @param D data set with peptide sequence as first column and peptide intensities in subsequent columns
Expand All @@ -95,7 +81,7 @@ generate_quant_graphs <- function(peptide_ratios, id_cols = 1, fasta_edgelist, o
#' @return list of list of graphs
#' @export
#'
#' @examples # TODO
#' @examples
generate_graphs_from_quant_data <- function(D, fasta, outpath = NULL, normalize = FALSE,
missed_cleavages = 2, min_aa = 6, max_aa = 50,
id_columns = 1, seq_column = "Sequence",
Expand All @@ -115,17 +101,11 @@ generate_graphs_from_quant_data <- function(D, fasta, outpath = NULL, normalize


# remove peptides outside the desired length range
# TODO: remove also peptides with too many missed cleavages
D <- D[nchar(D[, seq_column]) >= min_aa & nchar(D[, seq_column]) <= max_aa,]


#normalize Intensities
intensities <- D[,-id_columns]
# TODO: auch Median, Quantilsnormalisierung und LFQ-Normalisierung von MaxQuant erlauben?
if (normalize) {
#intensities <- 2^limma::normalizeBetweenArrays(log2(intensities), method = "cyclicloess")
}


### aggregate replicates by calculating the mean
group <- factor(limma::strsplit2(colnames(intensities), "_")[,1])
Expand Down
3 changes: 0 additions & 3 deletions R/helpers-add_graph_attributes.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@



#' Adds vertex attributes with uniqueness of peptides and number of unique peptides
#' for proteins
#'
Expand Down
26 changes: 0 additions & 26 deletions R/helpers-assign_protein_accessions.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

#' Assign protein accessions to a list of peptides, depending on a FASTA file.
#'
#' @param sequence vector of peptide sequences
Expand Down Expand Up @@ -43,28 +42,3 @@ assign_protein_accessions <- function(sequence, fasta_vec) {
return(unlist(assigned_proteins))
}

### TODO: wird abgelöst durch Grapherstellung über die Edgelist (Studienprojekt WS 22/23)

# sequence, fasta_vec

# protein_accessions <- names(fasta_vec)
#
# ### 1000 -> 12min
#
# x <- pblapply(sequence, function(x) {
# ind <- grep(paste0("(?:^M|K|R|^)(", x, ")(?=[^P]|$)"), fasta_vec, perl = TRUE)
# ### TODO: passt noch nicht ganz 100%ig, weil
# #proteins <- protein_accessions[ind]
# #proteins <- BBmisc::collapse(proteins, sep = "/")
# return(ind)
# }
# )
# #
# #
# #
# # table(x[[1]])
#
# # (?:^M|K|R|^)(WLSPEEVL)(?=[^P]|$)
#
# sequence[1:10]
# fasta_vec[x[[1]]]
8 changes: 2 additions & 6 deletions R/helpers-collapse_nodes_edgelist.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
### aus Studienprojekt WS 22/23
#' Collapsing of peptide and protein nodes of an edgelist.
#'
#' @param edgelist edgelist
Expand All @@ -18,9 +17,6 @@
#'


### TODO: Die Funktion funktioniert derzeit noch nicht, wenn z.B. die Proteinknoten schon collapsed sind!


collapse_edgelist <- function(edgelist,
collapse_protein_nodes = TRUE,
collapse_peptide_nodes = TRUE) {
Expand Down Expand Up @@ -57,11 +53,11 @@ collapse_edgelist <- function(edgelist,
#keep <- logical(nrow(edgelist2))

pepNodes2 <- pepNodes
pepNodes2$peptide <- limma::strsplit2(pepNodes2$peptide, ";")[,1] # erstes Peptid aus Liste!
pepNodes2$peptide <- limma::strsplit2(pepNodes2$peptide, ";")[,1] # first peptide from list
edgelist2 <- edgelist[edgelist$peptide %in% pepNodes2$peptide,]

protNodes2 <- protNodes
protNodes2$protein <- limma::strsplit2(protNodes2$protein, ";")[,1] # erstes Protein aus Liste!
protNodes2$protein <- limma::strsplit2(protNodes2$protein, ";")[,1] # first peptide from list
edgelist3 <- edgelist2[edgelist2$protein %in% protNodes2$protein,]

edgelist4 <- edgelist3
Expand Down
10 changes: 2 additions & 8 deletions R/helpers-collapse_nodes_edgelist_quant_pepratio.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
### aus Studienprojekt WS 22/23
#' Collapsing of peptide and protein nodes of an edgelist.
#'
#' @param edgelist edgelist
Expand All @@ -18,9 +17,6 @@
#'


### TODO: Die Funktion funktioniert derzeit noch nicht, wenn z.B. die Proteinknoten schon collapsed sind!


collapse_edgelist_quant <- function(edgelist,
collapse_protein_nodes = TRUE,
collapse_peptide_nodes = TRUE) {
Expand Down Expand Up @@ -54,16 +50,14 @@ collapse_edgelist_quant <- function(edgelist,


edgelist2 <- edgelist
#keep <- logical(nrow(edgelist2))


pepNodes2 <- pepNodes
pepNodes2$peptide <- limma::strsplit2(pepNodes2$peptide, ";")[,1] # erstes Peptid aus Liste!
pepNodes2$peptide <- limma::strsplit2(pepNodes2$peptide, ";")[,1] # first peptide from list
edgelist2 <- edgelist[edgelist$peptide %in% pepNodes2$peptide,]


protNodes2 <- protNodes
protNodes2$protein <- limma::strsplit2(protNodes2$protein, ";")[,1] # erstes Protein aus Liste!
protNodes2$protein <- limma::strsplit2(protNodes2$protein, ";")[,1] # first peptide from list
edgelist3 <- edgelist2[edgelist2$protein %in% protNodes2$protein,]

edgelist4 <- edgelist3
Expand Down
3 changes: 1 addition & 2 deletions R/helpers-convertToBipartiteGraph.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@

#' Conversion of submatrizes to subgraphs.
#' Conversion of submatrices to subgraphs.
#'
#' @param x element of a submatrix list
#'
Expand Down
6 changes: 0 additions & 6 deletions R/helpers-generate_edgelist.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@



#' Generate edgelist from list of in silico digested proteins
#'
#' @param digested_proteins Output from digest_fasta() (List of vectors of peptide sequences)
Expand All @@ -16,9 +13,6 @@
#' edgelist <- generate_edgelist(digested_proteins)
#'
#'
#'
#'
#'
generate_edgelist <- function(digested_proteins, prot_origin = NULL) {
#calculate necessary number of edges by counting the peptides belonging to each protein
mat_length <- sum(lengths(digested_proteins))
Expand Down
5 changes: 0 additions & 5 deletions R/helpers-generate_graphs_via_edgelist.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@


#' Generate bipartite peptide-protein graphs from a list of digested proteins via an edgelist
#'
#' @param edgelist Output from generate_edgelist (edgelist)
Expand All @@ -16,8 +14,6 @@
#' res <- generate_graphs_from_edgelist(edgelist)
#'

## TODO: weitere Spalten in Edgelist (z.B. protein_origin)

generate_graphs_from_edgelist <- function(edgelist) {

#generate graph from edge matrix
Expand All @@ -26,7 +22,6 @@ generate_graphs_from_edgelist <- function(edgelist) {
#assign vertex types to proteins and peptides for the graph to be bipartite
igraph::V(G)[igraph::V(G)$name %in% edgelist[,1]]$type <- TRUE
igraph::V(G)[igraph::V(G)$name %in% edgelist[,2]]$type <- FALSE
### TODO: export G

#decompose graph into connected components
subgraphs <- igraph::decompose(G)
Expand Down
47 changes: 4 additions & 43 deletions R/helpers-isomorphisms.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,64 +8,25 @@
#' @return TRUE if graphs are isomorphic, FALSE if not.
#' @export
#'
#' @examples # TODO
#' @examples
#'
#'

#graph1 <- G4
#graph2 <- G5


isomorphic_bipartite <- function(graph1, graph2, ...) {

#iso <- igraph::isomorphic(graph1, graph2)

## direct graphs if they are not directed yet
if (!igraph::is_directed(graph1)) graph1 <- bppg::direct_bipartite_graph(graph1)
if (!igraph::is_directed(graph2)) graph2 <- bppg::direct_bipartite_graph(graph2)
#graph2 <- bppg::direct_bipartite_graph(graph2)


# igraph::V(graph1)$color <- c("black", "white")[igraph::V(graph1)$type+1]
# igraph::V(graph2)$color <- c("black", "white")[igraph::V(graph2)$type+1]
iso <- igraph::isomorphic(graph1, graph2, method = "vf2")


iso <- igraph::isomorphic(graph1, graph2, method = "vf2")#, vertex.color1 = igraph::V(graph1)$color, vertex.color2 = igraph::V(graph2)$color)


# if(iso) {
# ## list all attributes except "type" to remove them before comparing the graphs
#
#
# cG1 <- igraph::canonical_permutation(graph1)#, colors = igraph::V(graph1)$type)
# cG1 <- igraph::permute(graph1, cG1$labeling)
#
# cG2 <- igraph::canonical_permutation(graph2)#, colors = igraph::V(graph2)$type)
# cG2 <- igraph::permute(graph2, cG2$labeling)
#
#
# attribute_list <- unique(c(igraph::vertex_attr_names(graph1), igraph::vertex_attr_names(graph2)))
# attribute_list <- attribute_list[!(attribute_list %in% c("type", "color"))]
#
#
# # if there are any attributes other than "type", they will be removed
# if (length(attribute_list) > 0) {
# for (i in 1:length(attribute_list)) {
# cG1 <- try({delete_vertex_attr(cG1, name = attribute_list[[i]])})
# cG2 <- try({delete_vertex_attr(cG2, name = attribute_list[[i]])})
# }
# }
#
# iso <- igraph::identical_graphs(cG1, cG2, attrs = FALSE)#all(igraph::V(cG1)$type == igraph::V(cG2)$type)
#
# }
return(iso)
}




#' Title
#' Transform a bipartite graph into a directed graph
#'
#' @param bip_graph
#' @param from_type TODO
Expand All @@ -74,7 +35,7 @@ isomorphic_bipartite <- function(graph1, graph2, ...) {
#' @export
#'
#' @examples
#' # TODO
#'
direct_bipartite_graph <- function(bip_graph, from_type = FALSE){


Expand Down
16 changes: 4 additions & 12 deletions R/helpers-preprocess_quant_peptide_data.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@


### TODO: read in data directly from MaxQuant and filter unnecessary columns and decoys
### TODO: Normalization


#' Import of MaxQuant's peptide.txt-table
#'
#' @param path Path to the peptides.txt table
Expand Down Expand Up @@ -112,10 +106,8 @@ aggregate_replicates <- function(D, group, missing.limit = 0, method = "mean",

res_tmp <- FUN(X_tmp, na.rm = TRUE)

# if (!use0) {
missingx <- apply(X_tmp, 1, function(x) mean(is.na(x)))
res_tmp[missingx > missing.limit | missingx == 1] <- NA
# }
missingx <- apply(X_tmp, 1, function(x) mean(is.na(x)))
res_tmp[missingx > missing.limit | missingx == 1] <- NA

res <- cbind(res, res_tmp)
}
Expand All @@ -139,7 +131,7 @@ aggregate_replicates <- function(D, group, missing.limit = 0, method = "mean",
#' @export
#'
#' @examples
#' ### TODO
#'
foldChange <- function(D, X, Y, useNA = FALSE) {
FC <- D[, Y] / D[, X]

Expand All @@ -164,7 +156,7 @@ foldChange <- function(D, X, Y, useNA = FALSE) {
#' @export
#'
#' @examples
#' ## TODO
#'
calculate_peptide_ratios <- function(aggr_intensities, id_cols = 1,
group_levels = NULL, type = "ratio", log_base = 10) {

Expand Down
Loading

0 comments on commit ba6131f

Please sign in to comment.