diff --git a/DESCRIPTION b/DESCRIPTION index 12b7d99..183dae2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,7 +9,7 @@ Description: Functionality to create and characterize bipartite graphs that License: BSD_3_clause + file LICENSE Encoding: UTF-8 Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 Imports: BBmisc, graphics, diff --git a/NAMESPACE b/NAMESPACE index 8cf1ec7..9f1e91a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,15 +1,12 @@ # Generated by roxygen2: do not edit by hand -S3method(duplicated,dgCMatrix) export(Digest2) export(add_average_pep_ratio) export(add_uniqueness_attributes) export(aggregate_replicates) export(assign_protein_accessions) -export(calculateIsomorphList) export(calculate_peptide_ratios) export(calculate_subgraph_characteristics) -export(calculate_subgraph_characteristics_OLD) export(calculate_summary_table) export(collapse_edgelist) export(collapse_edgelist_quant) @@ -26,5 +23,5 @@ export(generate_quant_graphs) export(geom_mean) export(isomorphic_bipartite) export(plotBipartiteGraph) -export(plotIsomorphList) export(read_MQ_peptidetable) +importFrom(igraph,"%->%") diff --git a/R/OLD_helpers_calculate_and_plot_isomorphism_lists.R b/R/OLD_helpers_calculate_and_plot_isomorphism_lists.R deleted file mode 100644 index 59e4de9..0000000 --- a/R/OLD_helpers_calculate_and_plot_isomorphism_lists.R +++ /dev/null @@ -1,128 +0,0 @@ - -### TODO: neue Version davon, die Repräsentanten ausrechnet und diese dann zuordnet? -### TODO: use isomorphic function that considers node tpes - -#' Calculation of isomorphism lists from submatrizes or subgraphs -#' -#' @param G list of subgraphs -#' #' -#' @return isomorph_list is a list of indizes that belong in the same isomorphism class -#' Graphs are graph representatives -#' @export -#' -#' @examples -#' ### TODO -calculateIsomorphList <- function(G) { - - - isomorph_list <- list() - k <- 1 - - ### TODO: progress bar - - for (i in 1:length(G)) { - print(i) - G_tmp <- G[[i]] - if (k == 1) {isomorph_list[[k]] <- i; k <- k + 1; next} - - for (j in 1:(k-1)) { - iso <- igraph::isomorphic(G_tmp, G[[isomorph_list[[j]][1]]]) # compare graph with 1st element if each isomorphism class - if (iso) { - cG <- igraph::canonical_permutation(G) - cG <- igraph::permute(G_tmp, cG$labeling) - cG2 <- igraph::canonical_permutation(Graphs[[isomorph_list[[j]][1]]]) - cG2 <- igraph::permute(Graphs[[isomorph_list[[j]][1]]], cG2$labeling) - - iso2 <- all(igraph::V(cG)$type == igraph::V(cG2)$type) # test if graphs are really the same (considering the node types) - - if(iso2) { - isomorph_list[[j]] <- c(isomorph_list[[j]], i); break - } - - } - - - # same_nr_of_prot_and_pep <- sum(V(G)$type) == sum(V(Graphs[[isomorph_list[[j]][1]]])$type) # compare number of type 1 nodes - # if(iso&same_nr_of_prot_and_pep){isomorph_list[[j]] <- c(isomorph_list[[j]], i); break} # add graph to group of isomorphic graphs - } - - if (!iso) {isomorph_list[[k]] <- i; k <- k + 1; next} # if it is not isomorphic to an existing class, start a new one - - } - return(list(isomorph_list = isomorph_list, Graphs = Graphs)) -} - - - - - - -#### function that plots list of isomorph classes, sorted by number of occurences -## isomorph_list: result of calculateIsomorphList -## Graphs: list of graphs -## path: path to save plots -## title: if TRUE, title is added with number of occurence and percentage value -## pdf: if TRUE, plot is saved in a single odf, if FALSE as multiple pngs -## cex.title: size of title -## which_graphs: ranks of classes that should be plottet (e.g. 1:10 for top 10 classes) -## mfrow: mfrow agrument of par function to arrance graphs in one plot -## save: if TRUE, graphs will be saved as pdf or png -## ...: further arguments to plotBipartiteGraph - -#' function that plots list of isomorph classes, sorted by number of occurences -#' -#' @param isomorph_list result of calculateIsomorphList -#' @param Graphs list of graphs -#' @param path path to save plots -#' @param title if TRUE, title is added with number of occurence and percentage value -#' @param pdf if TRUE, plot is saved in a single odf, if FALSE as multiple pngs -#' @param cex.title size of title -#' @param which_graphs ranks of classes that should be plottet (e.g. 1:10 for top 10 classes) -#' @param mfrow mfrow agrument of par function to arrance graphs in one plot -#' @param save if TRUE, graphs will be saved as pdf or png -#' @param title_format "times+percent" or "percent" -#' @param ... further arguments to plotBipartiteGraph -#' @param height height of plot -#' @param width width of plot -#' -#' @return plots saved as a pdf or multiple png files -#' @export -#' -#' @examples -#' ### TODO -plotIsomorphList <- function(isomorph_list, Graphs, path, title = TRUE, pdf = TRUE, - cex.title = 1, which_graphs = NULL, mfrow = c(1,1), save = TRUE, - title_format = "times+percent", ..., height = 15, width = 15) { - - ord_le_iso <- order(lengths(isomorph_list), decreasing = TRUE) # order by number of occurrences - if (!is.null(which_graphs)) ord_le_iso <- ord_le_iso[which_graphs] - le_iso <- lengths(isomorph_list) # sizes of isomorph classes - le_iso_total <- sum(le_iso) # total number of graphs - percentages <- round(le_iso/le_iso_total * 100, 2) # percentages (proportion of all graphs) - - - - if(pdf & save) grDevices::pdf(paste0(path, ".pdf")) - graphics::par(mfrow = mfrow) - graphics::par(mai = c(0.1, 0.5, 0.4, 0), oma = c(0,0,0,0), mfrow = mfrow) - j <- 1 - for (i in ord_le_iso) { - if (!pdf & save) grDevices::png(paste0(path, "_", j, ".png"), res = 500, units = "cm", height = height, width = width) - ind <- isomorph_list[[i]][1] # plot first element for each isomorph group - G <- Graphs[[ind]] - types <- igraph::vertex_attr(G)$type # type = 0 peptides, type = 1 proteins - G <- igraph::set_vertex_attr(G, name = "name", value = c(1:sum(!types), LETTERS[1:sum(types)])) - - bppg::plotBipartiteGraph(G, vertex.label.dist = 0, ...) - if(title & title_format == "times+percent") title(paste0(le_iso[i], " times (", - formatC(percentages[i], digits = 2, format = "f"), "%)"), cex.main = cex.title, line = 0.5) - if(title & title_format == "percent") title(paste0(formatC(percentages[i], digits = 2, format = "f"), "%"), cex.main = cex.title, line = 0.5) - if(!pdf & save) grDevices::dev.off() - j <- j + 1 - } - graphics::par(mfrow = c(1,1)) - if(pdf & save) grDevices::dev.off() - - -} - diff --git a/R/OLD_helpers_calculate_and_plot_isomorphism_lists2.R b/R/OLD_helpers_calculate_and_plot_isomorphism_lists2.R deleted file mode 100644 index ca4c196..0000000 --- a/R/OLD_helpers_calculate_and_plot_isomorphism_lists2.R +++ /dev/null @@ -1,131 +0,0 @@ - -### TODO: neue Version davon, die Repräsentanten ausrechnet und diese dann zuordnet? -### TODO: use isomorphic function that considers node tpes - -#' Calculation of isomorphism lists from submatrizes or subgraphs -#' -#' @param Submatrix list of submatrizes or subgraphs -#' @param matrix Is submatrix a list of matrizes? -#' -#' @return isomorph_list is a list of indizes that belong in the same isomorphism class -#' Graphs are graph representatives -#' @export -#' -#' @examples -#' ### TODO -calculateIsomorphList <- function(Submatrix, matrix = TRUE) { - if (matrix) { - Graphs <- lapply(Submatrix, convertToBipartiteGraph) - } else { - Graphs <- Submatrix - } - - isomorph_list <- list() - k <- 1 - - for (i in 1:length(Graphs)) { - print(i) - G <- Graphs[[i]] - if (k == 1) {isomorph_list[[k]] <- i; k <- k + 1; next} - - for (j in 1:(k-1)) { - iso <- igraph::isomorphic(G, Graphs[[isomorph_list[[j]][1]]]) # compare graph with 1st element if each isomorphism class - if (iso) { - cG <- igraph::canonical_permutation(G) - cG <- igraph::permute(G, cG$labeling) - cG2 <- igraph::canonical_permutation(Graphs[[isomorph_list[[j]][1]]]) - cG2 <- igraph::permute(Graphs[[isomorph_list[[j]][1]]], cG2$labeling) - - iso2 <- all(igraph::V(cG)$type == igraph::V(cG2)$type) # test if graphs are really the same (considering the node types) - - if(iso2) { - isomorph_list[[j]] <- c(isomorph_list[[j]], i); break - } - - } - - - # same_nr_of_prot_and_pep <- sum(V(G)$type) == sum(V(Graphs[[isomorph_list[[j]][1]]])$type) # compare number of type 1 nodes - # if(iso&same_nr_of_prot_and_pep){isomorph_list[[j]] <- c(isomorph_list[[j]], i); break} # add graph to group of isomorphic graphs - } - - if (!iso) {isomorph_list[[k]] <- i; k <- k + 1; next} # if it is not isomorphic to an existing class, start a new one - - } - return(list(isomorph_list = isomorph_list, Graphs = Graphs)) -} - - - - - - -#### function that plots list of isomorph classes, sorted by number of occurences -## isomorph_list: result of calculateIsomorphList -## Graphs: list of graphs -## path: path to save plots -## title: if TRUE, title is added with number of occurence and percentage value -## pdf: if TRUE, plot is saved in a single odf, if FALSE as multiple pngs -## cex.title: size of title -## which_graphs: ranks of classes that should be plottet (e.g. 1:10 for top 10 classes) -## mfrow: mfrow agrument of par function to arrance graphs in one plot -## save: if TRUE, graphs will be saved as pdf or png -## ...: further arguments to plotBipartiteGraph - -#' function that plots list of isomorph classes, sorted by number of occurences -#' -#' @param isomorph_list result of calculateIsomorphList -#' @param Graphs list of graphs -#' @param path path to save plots -#' @param title if TRUE, title is added with number of occurence and percentage value -#' @param pdf if TRUE, plot is saved in a single odf, if FALSE as multiple pngs -#' @param cex.title size of title -#' @param which_graphs ranks of classes that should be plottet (e.g. 1:10 for top 10 classes) -#' @param mfrow mfrow agrument of par function to arrance graphs in one plot -#' @param save if TRUE, graphs will be saved as pdf or png -#' @param title_format "times+percent" or "percent" -#' @param ... further arguments to plotBipartiteGraph -#' @param height height of plot -#' @param width width of plot -#' -#' @return plots saved as a pdf or multiple png files -#' @export -#' -#' @examples -#' ### TODO -plotIsomorphList <- function(isomorph_list, Graphs, path, title = TRUE, pdf = TRUE, - cex.title = 1, which_graphs = NULL, mfrow = c(1,1), save = TRUE, - title_format = "times+percent", ..., height = 15, width = 15) { - - ord_le_iso <- order(lengths(isomorph_list), decreasing = TRUE) # order by number of occurrences - if (!is.null(which_graphs)) ord_le_iso <- ord_le_iso[which_graphs] - le_iso <- lengths(isomorph_list) # sizes of isomorph classes - le_iso_total <- sum(le_iso) # total number of graphs - percentages <- round(le_iso/le_iso_total * 100, 2) # percentages (proportion of all graphs) - - - - if(pdf & save) grDevices::pdf(paste0(path, ".pdf")) - graphics::par(mfrow = mfrow) - graphics::par(mai = c(0.1, 0.5, 0.4, 0), oma = c(0,0,0,0), mfrow = mfrow) - j <- 1 - for (i in ord_le_iso) { - if (!pdf & save) grDevices::png(paste0(path, "_", j, ".png"), res = 500, units = "cm", height = height, width = width) - ind <- isomorph_list[[i]][1] # plot first element for each isomorph group - G <- Graphs[[ind]] - types <- igraph::vertex_attr(G)$type # type = 0 peptides, type = 1 proteins - G <- igraph::set_vertex_attr(G, name = "name", value = c(1:sum(!types), LETTERS[1:sum(types)])) - - bppg::plotBipartiteGraph(G, vertex.label.dist = 0, ...) - if(title & title_format == "times+percent") title(paste0(le_iso[i], " times (", - formatC(percentages[i], digits = 2, format = "f"), "%)"), cex.main = cex.title, line = 0.5) - if(title & title_format == "percent") title(paste0(formatC(percentages[i], digits = 2, format = "f"), "%"), cex.main = cex.title, line = 0.5) - if(!pdf & save) grDevices::dev.off() - j <- j + 1 - } - graphics::par(mfrow = c(1,1)) - if(pdf & save) grDevices::dev.off() - - -} - diff --git a/R/generate_graphs_from_FASTA.R b/R/generate_graphs_from_FASTA.R index cfcadac..ddfac91 100644 --- a/R/generate_graphs_from_FASTA.R +++ b/R/generate_graphs_from_FASTA.R @@ -7,7 +7,7 @@ #' @param suffix suffix for saving results #' @param save_intermediate Save intermediate results? #' @param ... additional arguments to bppg::digest_fasta() - +#' @param prot_origin origin of protein, e.g. organism etc. #' #' @return subgraphs (i.e. connected components) from the graph generated from the FASTA file. #' @export diff --git a/R/generate_graphs_from_quantdata.R b/R/generate_graphs_from_quantdata.R index af348e0..2ccfcc5 100644 --- a/R/generate_graphs_from_quantdata.R +++ b/R/generate_graphs_from_quantdata.R @@ -3,6 +3,11 @@ #' @param peptide_ratios table with peptide ratios #' @param id_cols columns with ids, e.g. peptide sequences (everything except the peptide ratios) #' @param fasta_edgelist Edgelist created from the corresponding FASTA file +#' @param outpath output path +#' @param seq_column column name of the peptide sequence +#' @param collapse_protein_nodes if TRUE protein nodes will be collapsed +#' @param collapse_peptide_nodes if TRUE, peptide nodes will be collapsed +#' @param suffix suffix for output files #' #' @return list of list of subgraphs #' @export @@ -72,26 +77,37 @@ generate_quant_graphs <- function(peptide_ratios, id_cols = 1, fasta_edgelist, o #' (e.g. output from bppg::read_MQ_peptidetable) #' @param fasta fasta file used for identification of peptides in D #' @param outpath bla -#' @param normalize currently only loess normalization possible #' @param missed_cleavages bla #' @param min_aa bla #' @param max_aa bla #' @param ... currently not in use +#' @param id_columns column numbers of D that contain ID information (the rest should contain only peptide intensities, properly normalized) +#' @param seq_column column name of the peptide sequence +#' @param collapse_protein_nodes if TRUE protein nodes will be collapsed +#' @param collapse_peptide_nodes if TRUE, peptide nodes will be collapsed +#' @param suffix suffix for output files #' #' @return list of list of graphs #' @export #' #' @examples -generate_graphs_from_quant_data <- function(D, fasta, outpath = NULL, normalize = FALSE, - missed_cleavages = 2, min_aa = 6, max_aa = 50, - id_columns = 1, seq_column = "Sequence", - collapse_protein_nodes = TRUE, collapse_peptide_nodes = FALSE, +generate_graphs_from_quant_data <- function(D, + fasta, + outpath = NULL, + #normalize = FALSE, + missed_cleavages = 2, + min_aa = 6, + max_aa = 50, + id_columns = 1, + seq_column = "Sequence", + collapse_protein_nodes = TRUE, + collapse_peptide_nodes = FALSE, suffix = "", ...) { message("Digesting FASTA file...") digested_proteins <- bppg::digest_fasta(fasta, missed_cleavages = missed_cleavages, - min_aa = min_aa, max_aa = max_aa)#, ...) + min_aa = min_aa, max_aa = max_aa) message("Generating edgelist ...") edgelist <- bppg::generate_edgelist(digested_proteins) @@ -99,12 +115,9 @@ generate_graphs_from_quant_data <- function(D, fasta, outpath = NULL, normalize openxlsx::write.xlsx(edgelist, file = paste0(outpath, "edgelist_fasta_", suffix, ".xlsx"), overwrite = TRUE, keepNA = TRUE) } - # remove peptides outside the desired length range D <- D[nchar(D[, seq_column]) >= min_aa & nchar(D[, seq_column]) <= max_aa,] - - #normalize Intensities intensities <- D[,-id_columns] ### aggregate replicates by calculating the mean diff --git a/R/helpers-add_graph_attributes.R b/R/helpers-add_graph_attributes.R index bce9483..67fadfc 100644 --- a/R/helpers-add_graph_attributes.R +++ b/R/helpers-add_graph_attributes.R @@ -39,7 +39,8 @@ add_uniqueness_attributes <- function(G) { #' Adds average peptide ratios as a attribute to the graphs, if a list of peptide ratios is already present #' -#' @param G +#' @param G graph +#' @param type not used at the moment. Default is 'geom_mean' #' #' @return graphs with added attributes #' @export diff --git a/R/helpers-collapse_nodes_edgelist_quant_pepratio.R b/R/helpers-collapse_nodes_edgelist_quant_pepratio.R index a238838..28da55d 100644 --- a/R/helpers-collapse_nodes_edgelist_quant_pepratio.R +++ b/R/helpers-collapse_nodes_edgelist_quant_pepratio.R @@ -28,11 +28,11 @@ collapse_edgelist_quant <- function(edgelist, ### Calculate list if protein nodes if (collapse_protein_nodes) { ### aggregate peptide sequences that belong to the same protein accession (1 row per protein accession) - protEdges <- aggregate(data = edgelist, x = cbind(peptide, pep_ratio) ~ protein, function(x) paste(sort(unique(x)), collapse = ";")) + protEdges <- stats::aggregate(data = edgelist, x = cbind(peptide, pep_ratio) ~ protein, function(x) paste(sort(unique(x)), collapse = ";")) ### aggregate proteins with the same set of peptides (-> protein nodes) - protNodes <- aggregate(data = protEdges, x = protein ~ peptide+pep_ratio, function(x) paste(sort(unique(x)), collapse = ";")) + protNodes <- stats::aggregate(data = protEdges, x = protein ~ peptide+pep_ratio, function(x) paste(sort(unique(x)), collapse = ";")) } else { - protEdges <- aggregate(data = edgelist, x = peptide ~ protein, function(x) paste(sort(unique(x)), collapse = ";")) + protEdges <- stats::aggregate(data = edgelist, x = peptide ~ protein, function(x) paste(sort(unique(x)), collapse = ";")) protNodes <- protEdges } @@ -40,11 +40,11 @@ collapse_edgelist_quant <- function(edgelist, ### calculate list of peptide nodes if (collapse_peptide_nodes) { ### aggregate protein accessions belonging to the same peptide sequences (1 row per peptide sequence) - pepEdges <- aggregate(data = edgelist, x = protein ~ peptide + pep_ratio, function(x) paste(sort(unique(x)), collapse = ";")) + pepEdges <- stats::aggregate(data = edgelist, x = protein ~ peptide + pep_ratio, function(x) paste(sort(unique(x)), collapse = ";")) ### aggregate peptides with the same set of proteins (-> peptide nodes) - pepNodes <- aggregate(data = pepEdges, x = cbind(peptide, pep_ratio) ~ protein, function(x) paste(sort(unique(x)), collapse = ";")) + pepNodes <- stats::aggregate(data = pepEdges, x = cbind(peptide, pep_ratio) ~ protein, function(x) paste(sort(unique(x)), collapse = ";")) } else { - pepEdges <- aggregate(data = edgelist, x = protein ~ peptide + pep_ratio, function(x) paste(sort(unique(x)), collapse = ";"), simplify = FALSE) + pepEdges <- stats::aggregate(data = edgelist, x = protein ~ peptide + pep_ratio, function(x) paste(sort(unique(x)), collapse = ";"), simplify = FALSE) pepNodes <- pepEdges } diff --git a/R/helpers-generate_edgelist.R b/R/helpers-generate_edgelist.R index 66195b4..209c7e0 100644 --- a/R/helpers-generate_edgelist.R +++ b/R/helpers-generate_edgelist.R @@ -1,6 +1,7 @@ #' Generate edgelist from list of in silico digested proteins #' #' @param digested_proteins Output from digest_fasta() (List of vectors of peptide sequences) +#' @param prot_origin origin of the protein (e.g. organism, spike-in/background etc) #' #' @return edgelist #' @export diff --git a/R/helpers-isomorphisms.R b/R/helpers-isomorphisms.R index 8c9da9d..66e982a 100644 --- a/R/helpers-isomorphisms.R +++ b/R/helpers-isomorphisms.R @@ -28,25 +28,27 @@ isomorphic_bipartite <- function(graph1, graph2, ...) { #' Transform a bipartite graph into a directed graph #' -#' @param bip_graph -#' @param from_type TODO +#' @param bip_graph a bipartite graph +#' @param from_type determines if protein or peptide nodes are the "from" nodes #' #' @return a bipartite graph that is know directed #' @export #' #' @examples #' +#' @importFrom igraph %->% +#' direct_bipartite_graph <- function(bip_graph, from_type = FALSE){ # turn undirected into directed edges - bip_graph <- as.directed(bip_graph, mode = "arbitrary") + bip_graph <- igraph::as.directed(bip_graph, mode = "arbitrary") from_vertices <- igraph::V(bip_graph)[igraph::V(bip_graph)$type == from_type] to_vertices <- igraph::V(bip_graph)[igraph::V(bip_graph)$type == !from_type] # reverse edges going from the "to-group" to the "from-group" - bip_graph <- reverse_edges(bip_graph, igraph::E(bip_graph)[to_vertices %->% from_vertices]) + bip_graph <- igraph::reverse_edges(bip_graph, igraph::E(bip_graph)[to_vertices %->% from_vertices]) return(bip_graph) } diff --git a/R/helpers-normalization.R b/R/helpers-normalization.R deleted file mode 100644 index f7f4eb6..0000000 --- a/R/helpers-normalization.R +++ /dev/null @@ -1,184 +0,0 @@ - -# TODO: LTS Normalisierung einbauen, wie in stat_workflows! - - -automatedNormalization <- function(D, D.name = deparse(substitute(D)), - method = "loess", suffix = method, log = TRUE, id = NULL, - output_path = "", save = FALSE, - groupwise = FALSE, group = NULL) { - - if(method == "loess" | method == "quantile" | method == "median") { - - if(log) { - D <- log2(D) - } - - #### choose normalization function - fun <- switch(method, - "loess" = limma::normalizeBetweenArrays, - "quantile" = limma::normalizeBetweenArrays, - "median" = limma::normalizeBetweenArrays) - - ### choose arguments for normalization function - args <- switch(method, - "loess" = list(object = D, method = "cyclicloess"), - "quantile" = list(object = D, method = "quantile"), - "median" = list(object = D, method = "scale")) - - if (!groupwise) { - D_norm <- do.call(fun, args) - D_norm <- as.data.frame(D_norm) - } else { - D_split <- split.default(D, group) - D_split_norm <- lapply(D_split, limma::normalizeBetweenArrays, method = args$method) - D_norm <- do.call(cbind, D_split_norm) - } - - # if (length(id_columns) >= 1 ) { - D_norm_2 <- data.frame(id, D_norm) - # } - - # tryCatch(expr = { - if (save) { - openxlsx::write.xlsx(x = D_norm_2, file = paste0(output_path, D.name, "_", suffix, ".xlsx"), keepNA = TRUE, overwrite = TRUE) - message("Normalized data successfully saved!") - } - # }, - # error = function(err) { - # error handler picks up where error was generated - # print(paste("MY_ERROR: ",err)) - # beepr::beep(sound = 10) - ## user_input <- readline(prompt = paste0("+++ Do you want to overwrite ", paste0(DATA.name,"_",method,".xlsx"), "? +++ [yes/no] ")) - # if(user_input == "yes"){ - # write.xlsx(x = DATA_norm_2, file = paste0(output_path, DATA.name,"_",suffix,".xlsx"), overwrite = TRUE, keepNA = TRUE) - # message("Normalized data successfully saved!") - # } else { - # message("Overwriting of normalized data failed. Please allow overwriting, remove the data file or choose different normalization method!") - # } - # }) - }else{ # if method == "nonorm" - D_norm <- D - cat("No normalization applied.") - - # if (ncol(id_columns) >= 1 ) { - D_norm_2 <- data.frame(id, D_norm) - # } - openxlsx::write.xlsx(x = D_norm_2, file = paste0(output_path, D.name, "_", suffix, ".xlsx"), keepNA = TRUE) - } - - return(D_norm) -} - - - - - - - - - -#### Function for a single MA-Plot: -# x1: Sample 1 -# x2: Sample 2 -# log: Should data be log-transformed? -# TRUE, if not already log-transformed, FALSE, if already log-transformed -# alpha: Should points be transparent? -# col: colours of the data points -# ...: further arguments for ma.plot -MAPlot_single <- function(x1, x2, log = TRUE, alpha = FALSE, col = "black", ...) { - - if(log) { - x1 <- log2(x1) - x2 <- log2(x2) - } - if(alpha) { - col = alpha(col, 0.5) - } - - M <- stats::na.omit(x1 - x2) - A <- stats::na.omit((x1 + x2)/2) - - if (length(col) > 1) { - na.ind <- attr(M, "na.action") - col <- col[-na.ind] - } - - - affy::ma.plot(A = A, M = M, pch = 16, cex = 0.7, col = col, show.statistics = FALSE, ...) -} - - - -# function to check if user is sure to plot more than 1000 plots --> if yes it return 1 and the plots will be created -MAPlots_check <- function(X, maxPlots, ...){ - number_states <- max(as.integer(as.factor(colnames(X)))) - number_plots <- choose(number_states,2) - return_value <- 2 - - if(number_plots >= maxPlots){ - #beepr::beep(sound = 10) - user_input <- readline(prompt = paste("Are you sure you want to generate",number_plots,"MA-plots? [yes/no]")) - if (user_input == "yes"){ - return_value <- 1 - }else return_value <- 0 - }else return_value <- 1 - - return(return_value) -} - - - -### main function for MA-Plots -# X: Data in wide format -# labels: labels of the samples for the title of the MA-Plot -# labels2: second line in title, e.g. group membership -MAPlots <- function(X, log = TRUE, alpha = FALSE, suffix="nonorm", - labels = 1:ncol(X), labels2 = colnames(X), maxPlots = 5000, - plot_height=15, plot_width=15, output_path = "", ...) { - - require(limma) - require(affy) - require(scales) - require(beepr) - - number_states <- max(as.integer(as.factor(colnames(X)))) - number_plots <- choose(number_states,2) - - if(MAPlots_check(X, maxPlots) == 1){ - - num <- 0 - - print("Generating MA-Plots ...") - - ### TODO: auf pbapply umsteigen - pb <- utils::txtProgressBar(min = 0,max = number_plots,char = "#",style = 3) - - grDevices::pdf(paste0(output_path, "MA_Plots_", suffix, ".pdf"), height = plot_height/2.54, width = plot_width/2.54) - - for(i in 1:(ncol(X)-1)) { - for (j in (i + 1):ncol(X)) { - - if (is.null(labels2)) { - main = paste(labels[i], labels[j]) - } else { - main = paste(labels[i], labels[j], "\n", labels2[i], labels2[j]) - } - - num <- num + 1 - utils::setTxtProgressBar(pb, num) - - MAPlot_single(X[,i], X[, j], log = log, main = main, ...) - } - } - # sound chosen, "treasure", "facebook" also cool :) - #beepr::beep("coin") - close(pb) - print("MA-Plots finished!") - - grDevices::dev.off() - - } - -} - - diff --git a/R/helpers-preprocess_quant_peptide_data.R b/R/helpers-preprocess_quant_peptide_data.R index 9d54ebf..8823fbf 100644 --- a/R/helpers-preprocess_quant_peptide_data.R +++ b/R/helpers-preprocess_quant_peptide_data.R @@ -5,9 +5,10 @@ #' @param remove_contaminants If TRUE, peptide sequences from potential contaminants are removed #' @param rename_columns Rename columns? If TRUE, "Intensity." or "LFQ.intensity." are removed #' @param zeroToNA If TRUE, zeros are converted to NAs. -#' @param remove_empty_rows +#' @param remove_empty_rows If TRUE, rows with only NAs are removed. +#' @param further_columns_to_keep additional columns to keep, except peptide sequence and intensities #' -#' @return Dataframe with sequences and intensities +#' @return Data frame with sequences and intensities #' @export #' #' @examples @@ -31,7 +32,7 @@ read_MQ_peptidetable <- function(path, LFQ = FALSE, remove_contaminants = FALSE, } - ## search for itensity columns or LFQ values + ## search for intensity columns or LFQ values if(LFQ) { intensities <- D[, grep("LFQ", colnames(D))] if (rename_columns) colnames(intensities) <- stringr::str_replace(colnames(intensities), "LFQ.intensity.", "") @@ -151,6 +152,7 @@ foldChange <- function(D, X, Y, useNA = FALSE) { #' @param id_cols column numbers that contain peptide sequences etc (everything except intensities) #' @param group_levels levels of groups in the right order #' @param type "ratio" or "difference". Difference if values are already on log-scale +#' @param log_base log base #' #' @return data set with peptide ratios #' @export diff --git a/R/helpers-prototypeList.R b/R/helpers-prototypeList.R index 42663bd..c8397b7 100644 --- a/R/helpers-prototypeList.R +++ b/R/helpers-prototypeList.R @@ -2,6 +2,7 @@ #' #' #' @param G graph +#' @param sort_by_nr_edges logical, if TRUE, the list of prototypes is sorted by number of edges #' #' @return list of prototype graphs plus count #' @export @@ -41,7 +42,7 @@ generatePrototypeList <- function(G, sort_by_nr_edges = FALSE) { ind <- which(x) - # delete Graphs isomorphic to G_tmp graphs (-> list becomes smallet) + # delete Graphs isomorphic to G_tmp graphs (-> list becomes smaller) # G_tmp itself is a new isomorphism class. if (length(ind) > 0) { G <- G[-(ind+i)] diff --git a/R/helpers-small_helper_functions.R b/R/helpers-small_helper_functions.R index 113b533..bae203a 100644 --- a/R/helpers-small_helper_functions.R +++ b/R/helpers-small_helper_functions.R @@ -1,12 +1,13 @@ #' Geometric mean #' #' @param x vector with numbers +#' @param useprod if TRUE, prod(x)^(1/n) will be calculated, otherwise exp(mean(log(x))) #' #' @return geometric mean of the provided data points #' @export #' #' @examples # TODO -geom_mean <- function(x,useprod = FALSE) { +geom_mean <- function(x, useprod = FALSE) { n <- length(x) if (useprod) { diff --git a/R/normalizeCyclicLoess2.R b/R/normalizeCyclicLoess2.R deleted file mode 100644 index 71a1a52..0000000 --- a/R/normalizeCyclicLoess2.R +++ /dev/null @@ -1,231 +0,0 @@ -# affy::ma.plot -# function (A, M, subset = sample(1:length(M), min(c(10000, length(M)))), -# show.statistics = TRUE, span = 2/3, family.loess = "gaussian", -# cex = 2, plot.method = c("normal", "smoothScatter", -# "add"), add.loess = TRUE, lwd = 1, lty = 1, loess.col = "red", -# ...) -# { -# plot.method <- match.arg(plot.method) -# fn.call <- list(...) -# sigma <- IQR(M) -# mean <- median(M) -# if (!is.element("ylim", names(fn.call))) { -# yloc <- max(M) -# } -# else { -# yloc <- max(fn.call$ylim) -# } -# if (!is.element("xlim", names(fn.call))) { -# xloc <- max(A) -# } -# else { -# xloc <- max(fn.call$xlim) -# } -# if (plot.method == "smoothScatter") { -# plotmethod <- "smoothScatter" -# } -# else if (plot.method == "add") { -# plotmethod <- "add" -# } -# else { -# plotmethod <- "normal" -# } -# aux <- loess(M[subset] ~ A[subset], degree = 1, span = span, -# family = family.loess)$fitted -# if (plotmethod == "smoothScatter") { -# smoothScatter(A, M, ...) -# } -# else if (plotmethod == "add") { -# points(A, M, cex = cex, ...) -# } -# else { -# plot(A, M, cex = cex, ...) -# } -# if (add.loess) { -# o <- order(A[subset]) -# A <- A[subset][o] -# M <- aux[o] -# o <- which(!duplicated(A)) -# lines(approx(A[o], M[o]), col = loess.col, lwd = lwd, -# lty = lty) -# } -# abline(0, 0, col = "blue") -# if (show.statistics) { -# txt <- format(sigma, digits = 3) -# txt2 <- format(mean, digits = 3) -# text(xloc, yloc, paste(paste("Median:", txt2), -# paste("IQR:", txt), sep = "\n"), cex = cex, -# adj = c(1, 1)) -# } -# } - - -################################################################################ -################################################################################ - -loessFit2 <- function (y, x, weights = NULL, span = 0.3, iterations = 4L, - min.weight = 1e-05, max.weight = 1e+05, equal.weights.as.null = TRUE, - method = "weightedLowess") { - n <- length(y) - if (length(x) != n) - stop("y and x have different lengths") - out <- list(fitted = rep(NA, n), residuals = rep(NA, n)) - obs <- is.finite(y) & is.finite(x) - xobs <- x[obs] - yobs <- y[obs] - nobs <- length(yobs) - if (nobs == 0) - return(out) - if (span < 1/nobs) { - out$fitted[obs] <- y[obs] - out$residuals[obs] <- 0 - return(out) - } - if (min.weight < 0) - min.weight <- 0 - if (!is.null(weights)) { - if (length(weights) != n) - stop("y and weights have different lengths") - wobs <- weights[obs] - wobs[is.na(wobs)] <- 0 - wobs <- pmax(wobs, min.weight) - wobs <- pmin(wobs, max.weight) - if (equal.weights.as.null) { - r <- range(wobs) - if (r[2] - r[1] < 1e-15) - weights <- NULL - } - } - if (is.null(weights)) { - o <- order(xobs) - #lo <- lowess(x = xobs, y = yobs, f = span, iter = iterations - - # 1L) - lo <- loess(yobs ~ xobs, span = span, - degree = 1, parametric = FALSE, normalize = FALSE, - statistics = "approximate", surface = "direct", # interpolate - cell = 0.01/span, iterations = iterations, trace.hat = "approximate", - family = "gaussian") - - - # loess(M[subset] ~ A[subset], degree = 1, span = span, - # # family = family.loess) - - - out$fitted[obs][o] <- lo$y - out$residuals[obs] <- yobs - out$fitted[obs] - out$mod <- lo - out <<- out - lo <<- lo - return(out) - } - # if (min.weight > 0) - # nwobs <- nobs - # else nwobs <- sum(wobs > 0) - # if (nwobs < 4 + 1/span) { - # if (nwobs == 1L) { - # out$fitted[obs] <- yobs[wobs > 0] - # out$residuals[obs] <- yobs - out$fitted[obs] - # } - # else { - # fit <- lm.wfit(cbind(1, xobs), yobs, wobs) - # out$fitted[obs] <- fit$fitted - # out$residuals[obs] <- fit$residuals - # } - # return(out) - # } - # method <- match.arg(method, c("weightedLowess", "locfit", - # "loess")) - # switch(method, weightedLowess = { - # fit <- weightedLowess(x = xobs, y = yobs, weights = wobs, - # span = span, iterations = iterations, npts = 200) - # out$fitted[obs] <- fit$fitted - # out$residuals[obs] <- fit$residuals - # out$mod <- fit - # fit <<- fit - # }, locfit = { - # if (!requireNamespace("locfit", quietly = TRUE)) stop("locfit required but is not installed (or can't be loaded)") - # biweights <- rep(1, nobs) - # for (i in 1:iterations) { - # fit <- locfit::locfit(yobs ~ xobs, weights = wobs * - # biweights, alpha = span, deg = 1) - # res <- residuals(fit, type = "raw") - # s <- median(abs(res)) - # biweights <- pmax(1 - (res/(6 * s))^2, 0)^2 - # } - # out$fitted[obs] <- fitted(fit) - # out$residuals[obs] <- res - # out$mod <- fit - # }, loess = { - # oldopt <- options(warn = -1) - # on.exit(options(oldopt)) - # bin <- 0.01 - # fit <- loess(yobs ~ xobs, weights = wobs, span = span, - # degree = 1, parametric = FALSE, normalize = FALSE, - # statistics = "approximate", surface = "interpolate", - # cell = bin/span, iterations = iterations, trace.hat = "approximate") - # out$fitted[obs] <- fit$fitted - # out$residuals[obs] <- fit$residuals - # out$mod <- fit - # }) - # print(fit) - # out -} - - - -################################################################################ -################################################################################ -##affy:: normalizeCyclicLoess - -normalizeCyclicLoess2 <- function (x, weights = NULL, span = 0.7, iterations = 3, method = "fast", subset = NULL) { - if (is.null(subset)) { - subset <- 1:nrow(x) - } - - x <- as.matrix(x) - method <- match.arg(method, c("fast", "affy", - "pairs")) - n <- ncol(x) - if (method == "pairs") { - for (k in 1:iterations) for (i in 1:(n - 1)) for (j in (i + - 1):n) { - m <- x[, j] - x[, i] - a <- 0.5 * (x[, j] + x[, i]) - mod <- loessFit2(m[subset], a[subset], weights = weights, span = span, method = "loess") # "weightedLowess" - f <- stats:::predict.loess(mod$mod, a) # cbind(m, a) - x[, i] <- x[, i] + f/2 - x[, j] <- x[, j] - f/2 - } - } - if (method == "fast") { - for (k in 1:iterations) { - a <- rowMeans(x, na.rm = TRUE) - for (i in 1:n) { - m <- x[, i] - a - mod <- loessFit2(m[subset], a[subset], weights = weights, span = span, method = "loess")#$fitted - mod <<- mod - f <- stats:::predict.loess(mod$mod, a) - print(i) - print(summary(f)) - x[, i] <- x[, i] - f - } - } - } - # if (method == "affy") { - # g <- nrow(x) - # for (k in 1:iterations) { - # adjustment <- matrix(0, g, n) - # for (i in 1:(n - 1)) for (j in (i + 1):n) { - # m <- x[, j] - x[, i] - # a <- 0.5 * (x[, j] + x[, i]) - # mod <- loessFit2(m, a, weights = weights, span = span, method = "loess")#$fitted - # f <- stats:::predict.loess(mod$mod, cbind(m, a)) - # adjustment[, j] <- adjustment[, j] + f - # adjustment[, i] <- adjustment[, i] - f - # } - # x <- x - adjustment/n - # } - # } - x -} - diff --git a/R/plotBipartiteGraph.R b/R/plotBipartiteGraph.R index 8c903c0..5e920e8 100644 --- a/R/plotBipartiteGraph.R +++ b/R/plotBipartiteGraph.R @@ -20,14 +20,16 @@ #' @param node_labels_peptides "numbers" or "pep_ratios" or "pep_ratio_aggr" #' @param round_digits Number of digits to round the peptide ratios to. #' @param use_edge_attributes Use edge attributes for plotting (e.g. deleted edges will be dashed)? +#' @param legend.x x-coordinate of the legend. +#' @param legend.y y-coordinate of the legend. #' #' @return Plot of one bipartite graph. #' @export #' #' @examples #' biadjacency_matrix <- matrix(c(1,1,1,0), nrow = 2) -#' G <- igraph::graph_from_incidence_matrix(biadjacency_matrix) -#' plotBipartiteGraph(G, three_shapes = TRUE, useCanonicalPermutation = TRUE) +#' G <- igraph::graph_from_biadjacency_matrix(biadjacency_matrix) +#' #plotBipartiteGraph(G, three_shapes = TRUE, useCanonicalPermutation = TRUE) plotBipartiteGraph <- function(G, vertex.label.dist = 0, legend = TRUE, vertex.color = c("mediumseagreen", "cadetblue2", "coral1"), vertex.size = 15, vertex.label.cex = 1, edge.width = 1, vertex.size2=15, @@ -35,7 +37,7 @@ plotBipartiteGraph <- function(G, vertex.label.dist = 0, legend = TRUE, node_labels_proteins = "letters", node_labels_peptides = "numbers", round_digits = 2, use_edge_attributes = FALSE, - legend.x = NULL, legend.y = NULL, + legend.x = "bottom", legend.y = NULL, ...) { igraph::V(G)$type <- !igraph::V(G)$type # switch node types so that proteins are at the top @@ -56,7 +58,7 @@ plotBipartiteGraph <- function(G, vertex.label.dist = 0, legend = TRUE, names_G[Layout[,2] == 1] <- LETTERS[rank(pos_proteins)] } if (node_labels_proteins == "accessions") { - names_G[Layout[,2] == 1] <- limma::strsplit2(V(G)$name[Layout[,2] == 1], ";")[,1] + names_G[Layout[,2] == 1] <- limma::strsplit2(igraph::V(G)$name[Layout[,2] == 1], ";")[,1] } # nicht geordnete Zahlen if (node_labels_proteins == "numbers_noord") { @@ -70,11 +72,11 @@ plotBipartiteGraph <- function(G, vertex.label.dist = 0, legend = TRUE, names_G[Layout[,2] == 0] <- names_peptides[rank(pos_peptides)] } if (node_labels_peptides == "pep_ratios") { - pep_ratios <- V(G)$pep_ratio + pep_ratios <- igraph::V(G)$pep_ratio names_G[Layout[,2] == 0] <- round(pep_ratios[Layout[,2] == 0],round_digits) } if (node_labels_peptides == "pep_ratio_aggr") { - pep_ratios <- V(G)$pep_ratio_aggr + pep_ratios <- igraph::V(G)$pep_ratio_aggr names_G[Layout[,2] == 0] <- round(pep_ratios[Layout[,2] == 0],round_digits) } if (node_labels_peptides == "") { @@ -86,23 +88,6 @@ plotBipartiteGraph <- function(G, vertex.label.dist = 0, legend = TRUE, ################################# - - # if (node_labels == "letters+numbers") { - # names_G[Layout[,2] == 1] <- LETTERS[rank(pos_proteins)] - # names_peptides <- 1:sum(Layout[,2] == 0) - # names_G[Layout[,2] == 0] <- names_peptides[rank(pos_peptides)] - # - # G <- igraph::set_vertex_attr(G, name = "name", value = names_G) - # } - # if (node_labels == "peptide_ratios") { - # pep_ratios <- V(G)$pep_ratio - # names_G[Layout[,2] == 1] <- limma::strsplit2(V(G)$name[Layout[,2] == 1], ";")[,1] - # - # # names_peptides <- 1:sum(Layout[,2] == 0) - # names_G[Layout[,2] == 0] <- round(pep_ratios[Layout[,2] == 0],2) - # G <- igraph::set_vertex_attr(G, name = "name", value = names_G) - # } - type <- integer(length(igraph::V(G))) type[!igraph::V(G)$type] <- 1 # "protein" type[igraph::V(G)$type] <- 2 # "shared peptide" @@ -133,7 +118,7 @@ plotBipartiteGraph <- function(G, vertex.label.dist = 0, legend = TRUE, #if (legend) graphics::par(mar = c(10, 4, 4, 2) + 0.1) if (use_edge_attributes) { - edge.lty <- E(G)$deleted + 1 + edge.lty <- igraph::E(G)$deleted + 1 } else { edge.lty <- 1 } diff --git a/R/tables-subgraph_characteristics.R b/R/tables-subgraph_characteristics.R index 993550b..748967b 100644 --- a/R/tables-subgraph_characteristics.R +++ b/R/tables-subgraph_characteristics.R @@ -2,10 +2,8 @@ #' Generates a table with characteristics for each subgraph in a list. #' #' @param S list of subgraphs, where peptide and protein nodes are collapsed -#' @param S2 list of subgraphs, where only protein nodes are collapsed -#' @param S3 list of subgraphs, where nodes are not collapsed #' @param fastalevel Are the subgraphs on fasta level? -#' @param comparison name of comparison, for quantitative level (not in use currently) +#' @param prototype Are the subgraphs part of a prototype list? #' @param file where to save the table. #' #' @@ -14,10 +12,21 @@ #' #' @examples #' # TODO -calculate_subgraph_characteristics_OLD <- function(S, S2, S3, fastalevel = TRUE, comparison = NULL, file = NULL) { +calculate_subgraph_characteristics <- function(S, #S2, S3, + fastalevel = TRUE, + prototype = FALSE, + #comparison = NULL, + file = NULL) { + + if (prototype) { + counter <- S$counter + S <- S$graph + } Data <- NULL + + ### TODO: das kann man auch anders lösen, indem man guckt ob es ne liste ist? Dann würde das Argument wegfallen if (fastalevel) { comparisons <- 1 } else { @@ -28,12 +37,12 @@ calculate_subgraph_characteristics_OLD <- function(S, S2, S3, fastalevel = TRUE, if (fastalevel) { S_tmp <- S - S2_tmp <- S2 - S3_tmp <- S3 + # S2_tmp <- S2 + # S3_tmp <- S3 } else { S_tmp <- S[[j]] - S2_tmp <- S2[[j]] - S3_tmp <- S3[[j]] + # S2_tmp <- S2[[j]] + # S3_tmp <- S3[[j]] } print(comparisons[j]) @@ -46,49 +55,82 @@ calculate_subgraph_characteristics_OLD <- function(S, S2, S3, fastalevel = TRUE, for (i in 1:length(S_tmp)) { G_tmp <- S_tmp[[i]] - G2_tmp <- S2_tmp[[i]] - G3_tmp <- S3_tmp[[i]] - #S_tmp <- igraph::as_incidence_matrix(G_tmp) + nr_protein_nodes <- sum(igraph::V(G_tmp)$type) + nr_peptide_nodes <- sum(!igraph::V(G_tmp)$type) + nr_edges <- igraph::gsize(G_tmp) + + nr_edges_per_pep_node <- igraph::degree(G_tmp)[!igraph::V(G_tmp)$type] + nr_unique_peptides <- sum(nr_edges_per_pep_node == 1) + nr_shared_peptides <- sum(nr_edges_per_pep_node > 1) + + + protein_acc <- igraph::V(G_tmp)$name[igraph::V(G_tmp)$type] + protein_acc <- strsplit(protein_acc, ";") + nr_protein_accessions <- sum(sapply(protein_acc, length)) + + peptide_seq <- igraph::V(G_tmp)$name[!igraph::V(G_tmp)$type] + peptide_seq <- strsplit(peptide_seq, ";") + nr_peptide_sequences <- sum(sapply(peptide_seq, length)) + - nr_proteins <- sum(igraph::V(G_tmp)$type) - nr_peptides <- sum(!igraph::V(G_tmp)$type) - nr_edges <- igraph::gsize(G_tmp) - nr_edges_per_pep_node <- igraph::degree(G_tmp)[!igraph::V(G_tmp)$type] - nr_unique_peptides <- sum(nr_edges_per_pep_node == 1) - nr_shared_peptides <- sum(nr_edges_per_pep_node > 1) + unique_peptide_nodes <- igraph::V(G_tmp)[(igraph::degree(G_tmp) == 1 & !igraph::V(G_tmp)$type)] - nr_protein_accessions <- sum(igraph::V(G3_tmp)$type) - nr_peptide_sequences <- sum(!igraph::V(G2_tmp)$type) - nr_edges_per_pep_node2 <- igraph::degree(G2_tmp)[!igraph::V(G2_tmp)$type] - nr_peptide_sequences_unique <- sum(nr_edges_per_pep_node2 == 1) - nr_peptide_sequences_shared <- sum(nr_edges_per_pep_node2 > 1) + if (length(unique_peptide_nodes) == 0) { # Fall: keine uniquen Peptide im ganzen Graphen + nr_prot_node_only_unique_pep <- 0 + nr_prot_node_unique_and_shared_pep <- 0 + nr_prot_node_only_shared_pep <- nr_protein_nodes + } else { + if (length(unique_peptide_nodes) == 1 & nr_protein_nodes == 1) { # Fall: I-shaped graph + nr_prot_node_only_unique_pep <- 1 + nr_prot_node_unique_and_shared_pep <- 0 + nr_prot_node_only_shared_pep <- 0 + } else { + # neighborhood of the unique peptides (these are proteins with a unique peptide) + NH_of_unique_peptides <- igraph::ego(G_tmp, order = 1, mindist = 1, nodes = unique_peptide_nodes) - D_tmp <- data.frame(graph_ID = i, - nr_protein_nodes = nr_proteins, - nr_peptide_nodes = nr_peptides, - nr_unique_peptide_nodes = nr_unique_peptides, - nr_shared_peptide_nodes = nr_shared_peptides, - nr_edges = nr_edges, - nr_protein_accessions = nr_protein_accessions, - nr_peptide_sequences = nr_peptide_sequences, - nr_peptide_sequences_unique = nr_peptide_sequences_unique, - nr_peptide_sequences_shared = nr_peptide_sequences_shared, - comparison = comparisons[j] + nr_prot_node_only_unique_pep <- 0 + nr_prot_node_unique_and_shared_pep <- length(NH_of_unique_peptides) # = Anzahl uniquer Peptide?? + nr_prot_node_only_shared_pep <- nr_protein_nodes - nr_prot_node_unique_and_shared_pep#length(NH_of_unique_peptides) + } + } - ) + ### TODO: add nr of unique and shared peptide sequences + ### TODO: add info about graph type (isomorphism list!) - Data <- rbind(Data, D_tmp) + D_tmp <- data.frame(graph_ID = i, + nr_protein_nodes = nr_protein_nodes, + nr_peptide_nodes = nr_peptide_nodes, + nr_unique_peptide_nodes = nr_unique_peptides, + nr_shared_peptide_nodes = nr_shared_peptides, + nr_edges = as.integer(nr_edges), + nr_protein_accessions = nr_protein_accessions, + nr_peptide_sequences = as.integer(nr_peptide_sequences), + # nr_peptide_sequences_unique = nr_peptide_sequences_unique, + # nr_peptide_sequences_shared = nr_peptide_sequences_shared, + nr_prot_node_only_unique_pep = nr_prot_node_only_unique_pep, + nr_prot_node_unique_and_shared_pep = nr_prot_node_unique_and_shared_pep, + nr_prot_node_only_shared_pep = nr_prot_node_only_shared_pep, + comparison = comparisons[j] - pbapply::setpb(pb, i) + ) + + Data <- rbind(Data, D_tmp) + + pbapply::setpb(pb, i) + } + #progress bar command + invisible(NULL) } - #progress bar command - invisible(NULL) -} + + if (prototype) { + Data <- cbind(Data, counter = counter) + } + if (!is.null(file)) openxlsx::write.xlsx(Data, file, overwrite = TRUE) return(Data) diff --git a/R/tables-subgraph_characteristics_NEW.R b/R/tables-subgraph_characteristics_NEW.R deleted file mode 100644 index f2d9053..0000000 --- a/R/tables-subgraph_characteristics_NEW.R +++ /dev/null @@ -1,151 +0,0 @@ - -#' Generates a table with characteristics for each subgraph in a list. -#' -#' @param S list of subgraphs, where peptide and protein nodes are collapsed -#' @param fastalevel Are the subgraphs on fasta level? -#' @param prototype Are the subgraphs part of a prototype list? -#' @param file where to save the table. -#' -#' -#' @return table -#' @export -#' -#' @examples -#' # TODO -calculate_subgraph_characteristics <- function(S, #S2, S3, - fastalevel = TRUE, - prototype = FALSE, - #comparison = NULL, - file = NULL) { - - if (prototype) { - counter <- S$counter - S <- S$graph - } - - # print(str(S, 1)) - #print(counter) - - Data <- NULL - - if (fastalevel) { - comparisons <- 1 - } else { - comparisons <- names(S) - } - - for (j in 1:length(comparisons)) { - - if (fastalevel) { - S_tmp <- S - # S2_tmp <- S2 - # S3_tmp <- S3 - } else { - S_tmp <- S[[j]] - # S2_tmp <- S2[[j]] - # S3_tmp <- S3[[j]] - } - - print(comparisons[j]) - - #add progress bar to loop - number_of_iterations <- length(S_tmp) - pb <- pbapply::startpb(0, length(S_tmp)) - on.exit(pbapply::closepb(pb)) - - for (i in 1:length(S_tmp)) { - - G_tmp <- S_tmp[[i]] - # G2_tmp <- S2_tmp[[i]] - # G3_tmp <- S3_tmp[[i]] - - - #S_tmp <- igraph::as_incidence_matrix(G_tmp) - - nr_protein_nodes <- sum(igraph::V(G_tmp)$type) - nr_peptide_nodes <- sum(!igraph::V(G_tmp)$type) - nr_edges <- igraph::gsize(G_tmp) - - nr_edges_per_pep_node <- igraph::degree(G_tmp)[!igraph::V(G_tmp)$type] - nr_unique_peptides <- sum(nr_edges_per_pep_node == 1) - nr_shared_peptides <- sum(nr_edges_per_pep_node > 1) - - - protein_acc <- igraph::V(G_tmp)$name[igraph::V(G_tmp)$type] - protein_acc <- strsplit(protein_acc, ";") - nr_protein_accessions <- sum(sapply(protein_acc, length)) - - peptide_seq <- igraph::V(G_tmp)$name[!igraph::V(G_tmp)$type] - peptide_seq <- strsplit(peptide_seq, ";") - nr_peptide_sequences <- sum(sapply(peptide_seq, length)) - - - - unique_peptide_nodes <- V(G_tmp)[(degree(G_tmp) == 1 & !V(G_tmp)$type)] - - if (length(unique_peptide_nodes) == 0) { # Fall: keine uniquen Peptide im ganzen Graphen - nr_prot_node_only_unique_pep <- 0 - nr_prot_node_unique_and_shared_pep <- 0 - nr_prot_node_only_shared_pep <- nr_protein_nodes - - } else { - if (length(unique_peptide_nodes) == 1 & nr_protein_nodes == 1) { # Fall: I-shaped graph - nr_prot_node_only_unique_pep <- 1 - nr_prot_node_unique_and_shared_pep <- 0 - nr_prot_node_only_shared_pep <- 0 - } else { - - # neighborhood of the unique peptides (these are proteins with a unique peptide) - NH_of_unique_peptides <- ego(G_tmp, order = 1, mindist = 1, nodes = unique_peptide_nodes) - - nr_prot_node_only_unique_pep <- 0 - nr_prot_node_unique_and_shared_pep <- length(NH_of_unique_peptides) # = Anzahl uniquer Peptide?? - nr_prot_node_only_shared_pep <- nr_protein_nodes - nr_prot_node_unique_and_shared_pep#length(NH_of_unique_peptides) - } - } - - - - - - - -### TODO: add nr of unique and shared peptide sequences -### TODO: add infor about graph type (isomorphism list!) - - - D_tmp <- data.frame(graph_ID = i, - nr_protein_nodes = nr_protein_nodes, - nr_peptide_nodes = nr_peptide_nodes, - nr_unique_peptide_nodes = nr_unique_peptides, - nr_shared_peptide_nodes = nr_shared_peptides, - nr_edges = as.integer(nr_edges), - nr_protein_accessions = nr_protein_accessions, - nr_peptide_sequences = as.integer(nr_peptide_sequences), - # nr_peptide_sequences_unique = nr_peptide_sequences_unique, - # nr_peptide_sequences_shared = nr_peptide_sequences_shared, - nr_prot_node_only_unique_pep = nr_prot_node_only_unique_pep, - nr_prot_node_unique_and_shared_pep = nr_prot_node_unique_and_shared_pep, - nr_prot_node_only_shared_pep = nr_prot_node_only_shared_pep, - comparison = comparisons[j] - - ) - - Data <- rbind(Data, D_tmp) - - - - pbapply::setpb(pb, i) - } - #progress bar command - invisible(NULL) - } - - if (prototype) { - Data <- cbind(Data, counter = counter) - } - - if (!is.null(file)) openxlsx::write.xlsx(Data, file, overwrite = TRUE) - - return(Data) -} diff --git a/inst/extdata/uniprot_test.fasta b/inst/extdata/uniprot_test.fasta index 3da137a..7a4571d 100644 --- a/inst/extdata/uniprot_test.fasta +++ b/inst/extdata/uniprot_test.fasta @@ -80,3 +80,11 @@ FLQNLLSDERLCQSEALYAFLSPSPDYLKVIDVQGKKNSFSLSSFLERLPRDFFSHQEEE TEEDSDLSDYGDDVDGRKDALAEPCFMLIGEIFELRGMFKWVRRTLIALVQVTFGRTINK QIRDTVSWIFSEQMLVYYINIFRDAFWPNGKLAPPTTIRSKEQSQETKQRAQQKLLENIP DMLQSLVGQQNARHGIIKIFNALQETRANKHLLYALMELLLIELCPELRVHLDQLKAGQV +>sp|O43402|EMC8_HUMAN ER membrane protein complex subunit 8 OS=Homo sapiens OX=9606 GN=EMC8 PE=1 SV=1 +MPGVKLTTQAYCKMVLHGAKYPHCAVNGLLVAEKQKPRKEHLPLGGPGAHHTLFVDCIPL +FHGTLALAPMLEVALTLIDSWCKDHSYVIAGYYQANERVKDASPNQVAEKVASRIAEGFS +DTALIMVDNTKFTMDCVAPTIHVYEHHENRWRCRDPHHDYCEDWPEAQRISASLLDSRSY +ETLVDFDNHLDDIRNDWTNPEINKAVLHLC +>tr|M0R1B0|M0R1B0_HUMAN ER membrane protein complex subunit 8 (Fragment) OS=Homo sapiens OX=9606 GN=EMC8 PE=1 SV=1 +VASRIAEGFSDTALIMVDNTKFTMDCVAPTIHVYEHHENRWRCRDPHHDYCEDWPEAQRI +SASLLDSRSYETLVDFDNHLDDIRNDWTNPEINKAVLHLC diff --git a/man/add_average_pep_ratio.Rd b/man/add_average_pep_ratio.Rd index 8206568..a42ee7f 100644 --- a/man/add_average_pep_ratio.Rd +++ b/man/add_average_pep_ratio.Rd @@ -7,7 +7,9 @@ add_average_pep_ratio(G, type = "geom_mean") } \arguments{ -\item{G}{} +\item{G}{graph} + +\item{type}{not used at the moment. Default is 'geom_mean'} } \value{ graphs with added attributes diff --git a/man/calculateIsomorphList.Rd b/man/calculateIsomorphList.Rd deleted file mode 100644 index 770fc25..0000000 --- a/man/calculateIsomorphList.Rd +++ /dev/null @@ -1,35 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/helpers_calculate_and_plot_isomorphism_lists.R, -% R/helpers_calculate_and_plot_isomorphism_lists2.R -\name{calculateIsomorphList} -\alias{calculateIsomorphList} -\title{Calculation of isomorphism lists from submatrizes or subgraphs} -\usage{ -calculateIsomorphList(Submatrix, matrix = TRUE) - -calculateIsomorphList(Submatrix, matrix = TRUE) -} -\arguments{ -\item{Submatrix}{list of submatrizes or subgraphs} - -\item{matrix}{Is submatrix a list of matrizes?} - -\item{G}{list of subgraphs -#'} -} -\value{ -isomorph_list is a list of indizes that belong in the same isomorphism class -Graphs are graph representatives - -isomorph_list is a list of indizes that belong in the same isomorphism class -Graphs are graph representatives -} -\description{ -Calculation of isomorphism lists from submatrizes or subgraphs - -Calculation of isomorphism lists from submatrizes or subgraphs -} -\examples{ -### TODO -### TODO -} diff --git a/man/calculate_peptide_ratios.Rd b/man/calculate_peptide_ratios.Rd index 24bb6cb..80c07c2 100644 --- a/man/calculate_peptide_ratios.Rd +++ b/man/calculate_peptide_ratios.Rd @@ -4,7 +4,13 @@ \alias{calculate_peptide_ratios} \title{Calculation of peptide ratios from aggregated intensities} \usage{ -calculate_peptide_ratios(aggr_intensities, id_cols = 1, group_levels = NULL) +calculate_peptide_ratios( + aggr_intensities, + id_cols = 1, + group_levels = NULL, + type = "ratio", + log_base = 10 +) } \arguments{ \item{aggr_intensities}{result from function aggregate_replicates} @@ -12,6 +18,10 @@ calculate_peptide_ratios(aggr_intensities, id_cols = 1, group_levels = NULL) \item{id_cols}{column numbers that contain peptide sequences etc (everything except intensities)} \item{group_levels}{levels of groups in the right order} + +\item{type}{"ratio" or "difference". Difference if values are already on log-scale} + +\item{log_base}{log base} } \value{ data set with peptide ratios @@ -19,6 +29,3 @@ data set with peptide ratios \description{ Calculation of peptide ratios from aggregated intensities } -\examples{ -## TODO -} diff --git a/man/calculate_subgraph_characteristics.Rd b/man/calculate_subgraph_characteristics.Rd index 7efff68..91ae9be 100644 --- a/man/calculate_subgraph_characteristics.Rd +++ b/man/calculate_subgraph_characteristics.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tables-subgraph_characteristics_NEW.R +% Please edit documentation in R/tables-subgraph_characteristics.R \name{calculate_subgraph_characteristics} \alias{calculate_subgraph_characteristics} \title{Generates a table with characteristics for each subgraph in a list.} diff --git a/man/calculate_subgraph_characteristics_OLD.Rd b/man/calculate_subgraph_characteristics_OLD.Rd deleted file mode 100644 index 8c1949a..0000000 --- a/man/calculate_subgraph_characteristics_OLD.Rd +++ /dev/null @@ -1,37 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/tables-subgraph_characteristics.R -\name{calculate_subgraph_characteristics_OLD} -\alias{calculate_subgraph_characteristics_OLD} -\title{Generates a table with characteristics for each subgraph in a list.} -\usage{ -calculate_subgraph_characteristics_OLD( - S, - S2, - S3, - fastalevel = TRUE, - comparison = NULL, - file = NULL -) -} -\arguments{ -\item{S}{list of subgraphs, where peptide and protein nodes are collapsed} - -\item{S2}{list of subgraphs, where only protein nodes are collapsed} - -\item{S3}{list of subgraphs, where nodes are not collapsed} - -\item{fastalevel}{Are the subgraphs on fasta level?} - -\item{comparison}{name of comparison, for quantitative level (not in use currently)} - -\item{file}{where to save the table.} -} -\value{ -table -} -\description{ -Generates a table with characteristics for each subgraph in a list. -} -\examples{ -# TODO -} diff --git a/man/convertToBipartiteGraph.Rd b/man/convertToBipartiteGraph.Rd index f7cab4c..42436dc 100644 --- a/man/convertToBipartiteGraph.Rd +++ b/man/convertToBipartiteGraph.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/helpers-convertToBipartiteGraph.R \name{convertToBipartiteGraph} \alias{convertToBipartiteGraph} -\title{Conversion of submatrizes to subgraphs.} +\title{Conversion of submatrices to subgraphs.} \usage{ convertToBipartiteGraph(x) } @@ -13,7 +13,7 @@ convertToBipartiteGraph(x) graph as igraph object } \description{ -Conversion of submatrizes to subgraphs. +Conversion of submatrices to subgraphs. } \examples{ M <- matrix(c(1,0,1,1), nrow = 2, byrow = TRUE) diff --git a/man/direct_bipartite_graph.Rd b/man/direct_bipartite_graph.Rd index e2bafcf..2ed441a 100644 --- a/man/direct_bipartite_graph.Rd +++ b/man/direct_bipartite_graph.Rd @@ -2,19 +2,18 @@ % Please edit documentation in R/helpers-isomorphisms.R \name{direct_bipartite_graph} \alias{direct_bipartite_graph} -\title{Title} +\title{Transform a bipartite graph into a directed graph} \usage{ direct_bipartite_graph(bip_graph, from_type = FALSE) } \arguments{ -\item{from_type}{TODO} +\item{bip_graph}{a bipartite graph} + +\item{from_type}{determines if protein or peptide nodes are the "from" nodes} } \value{ a bipartite graph that is know directed } \description{ -Title -} -\examples{ -# TODO +Transform a bipartite graph into a directed graph } diff --git a/man/duplicated.dgCMatrix.Rd b/man/duplicated.dgCMatrix.Rd deleted file mode 100644 index 3ec4dca..0000000 --- a/man/duplicated.dgCMatrix.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/helpers-duplicated.dgCMatrix.R -\name{duplicated.dgCMatrix} -\alias{duplicated.dgCMatrix} -\title{duplicated() but for sparse dgCMatrix objects} -\usage{ -\method{duplicated}{dgCMatrix}(x, incomparables = FALSE, MARGIN, ...) -} -\arguments{ -\item{x}{A dgCMat object (sparse matrix generated by the Matrix package).} - -\item{incomparables}{no functionality} - -\item{MARGIN}{1 = rowwise, 2 = column wise.} - -\item{...}{no functionality} -} -\value{ -Logical vector indicating if the columns or rows of the matrix are duplicated. -} -\description{ -duplicated() but for sparse dgCMatrix objects -} -\examples{ - library(Matrix) - M <- Matrix::Matrix(c(0,0,1,0,0,1,1 ,0, 0,0), byrow = FALSE, nrow = 2) - duplicated(M, MARGIN = 2) - -} diff --git a/man/foldChange.Rd b/man/foldChange.Rd index 6a22de6..c958d9a 100644 --- a/man/foldChange.Rd +++ b/man/foldChange.Rd @@ -21,6 +21,3 @@ fold changes (Y/X) \description{ calculates peptide ratios for pairwise comparisons of groups (Y/X) } -\examples{ -### TODO -} diff --git a/man/generatePrototypeList.Rd b/man/generatePrototypeList.Rd index 3665aac..d341518 100644 --- a/man/generatePrototypeList.Rd +++ b/man/generatePrototypeList.Rd @@ -7,16 +7,13 @@ generatePrototypeList(G, sort_by_nr_edges = FALSE) } \arguments{ -\item{G}{bla} +\item{G}{graph} + +\item{sort_by_nr_edges}{logical, if TRUE, the list of prototypes is sorted by number of edges} } \value{ -bla +list of prototype graphs plus count } \description{ Generates a list of graph prototypes for the different isomorphism classes and } -\examples{ -# TODO - - -} diff --git a/man/generate_edgelist.Rd b/man/generate_edgelist.Rd index ebbe1a1..3cd654a 100644 --- a/man/generate_edgelist.Rd +++ b/man/generate_edgelist.Rd @@ -8,6 +8,8 @@ generate_edgelist(digested_proteins, prot_origin = NULL) } \arguments{ \item{digested_proteins}{Output from digest_fasta() (List of vectors of peptide sequences)} + +\item{prot_origin}{origin of the protein (e.g. organism, spike-in/background etc)} } \value{ edgelist @@ -23,7 +25,4 @@ digested_proteins <- digest_fasta(fasta) edgelist <- generate_edgelist(digested_proteins) - - - } diff --git a/man/generate_graphs_from_FASTA.Rd b/man/generate_graphs_from_FASTA.Rd index cea0af8..f623c05 100644 --- a/man/generate_graphs_from_FASTA.Rd +++ b/man/generate_graphs_from_FASTA.Rd @@ -22,12 +22,14 @@ generate_graphs_from_FASTA( \item{collapse_peptide_nodes}{collapse peptide nodes?} -\item{result_path}{path whereresults are saved. If NULL, results are not saved} +\item{result_path}{path where results are saved. If NULL, results are not saved} \item{suffix}{suffix for saving results} \item{save_intermediate}{Save intermediate results?} +\item{prot_origin}{origin of protein, e.g. organism etc.} + \item{...}{additional arguments to bppg::digest_fasta()} } \value{ diff --git a/man/generate_graphs_from_quant_data.Rd b/man/generate_graphs_from_quant_data.Rd index f3a52aa..40c3b22 100644 --- a/man/generate_graphs_from_quant_data.Rd +++ b/man/generate_graphs_from_quant_data.Rd @@ -8,7 +8,6 @@ generate_graphs_from_quant_data( D, fasta, outpath = NULL, - normalize = FALSE, missed_cleavages = 2, min_aa = 6, max_aa = 50, @@ -28,14 +27,22 @@ generate_graphs_from_quant_data( \item{outpath}{bla} -\item{normalize}{currently only loess normalization possible} - \item{missed_cleavages}{bla} \item{min_aa}{bla} \item{max_aa}{bla} +\item{id_columns}{column numbers of D that contain ID information (the rest should contain only peptide intensities, properly normalized)} + +\item{seq_column}{column name of the peptide sequence} + +\item{collapse_protein_nodes}{if TRUE protein nodes will be collapsed} + +\item{collapse_peptide_nodes}{if TRUE, peptide nodes will be collapsed} + +\item{suffix}{suffix for output files} + \item{...}{currently not in use} } \value{ @@ -44,6 +51,3 @@ list of list of graphs \description{ Generate graphs from quantitative peptide-level data } -\examples{ -# TODO -} diff --git a/man/generate_quant_graphs.Rd b/man/generate_quant_graphs.Rd index 0d2c200..2d1d47a 100644 --- a/man/generate_quant_graphs.Rd +++ b/man/generate_quant_graphs.Rd @@ -21,6 +21,16 @@ generate_quant_graphs( \item{id_cols}{columns with ids, e.g. peptide sequences (everything except the peptide ratios)} \item{fasta_edgelist}{Edgelist created from the corresponding FASTA file} + +\item{outpath}{output path} + +\item{seq_column}{column name of the peptide sequence} + +\item{collapse_protein_nodes}{if TRUE protein nodes will be collapsed} + +\item{collapse_peptide_nodes}{if TRUE, peptide nodes will be collapsed} + +\item{suffix}{suffix for output files} } \value{ list of list of subgraphs @@ -28,7 +38,3 @@ list of list of subgraphs \description{ Generate graphs from peptide ratio table, using an edgelist calculated on the fasta file } -\examples{ -### TODO: Einstellbar, ob Peptid-Knoten auch gemergt werden sollen (dann mit geom. Mittel als peptid-ratio). -#### Das funktioniert noch nicht!!! -} diff --git a/man/geom_mean.Rd b/man/geom_mean.Rd index ef60084..5070bf0 100644 --- a/man/geom_mean.Rd +++ b/man/geom_mean.Rd @@ -4,10 +4,12 @@ \alias{geom_mean} \title{Geometric mean} \usage{ -geom_mean(x) +geom_mean(x, useprod = FALSE) } \arguments{ \item{x}{vector with numbers} + +\item{useprod}{if TRUE, prod(x)^(1/n) will be calculated, otherwise exp(mean(log(x)))} } \value{ geometric mean of the provided data points diff --git a/man/isomorphic_bipartite.Rd b/man/isomorphic_bipartite.Rd index fb673f7..d1f02d3 100644 --- a/man/isomorphic_bipartite.Rd +++ b/man/isomorphic_bipartite.Rd @@ -21,8 +21,3 @@ TRUE if graphs are isomorphic, FALSE if not. Enchanced version of the igraph::isomorphic function that also considers the node type in bipartite graphs, e.g. that W- and M-shaped graphs are NOT isomorphic } -\examples{ -# TODO - - -} diff --git a/man/plotBipartiteGraph.Rd b/man/plotBipartiteGraph.Rd index 5fabc83..f7a62d2 100644 --- a/man/plotBipartiteGraph.Rd +++ b/man/plotBipartiteGraph.Rd @@ -19,6 +19,8 @@ plotBipartiteGraph( node_labels_peptides = "numbers", round_digits = 2, use_edge_attributes = FALSE, + legend.x = "bottom", + legend.y = NULL, ... ) } @@ -51,6 +53,10 @@ plotBipartiteGraph( \item{use_edge_attributes}{Use edge attributes for plotting (e.g. deleted edges will be dashed)?} +\item{legend.x}{x-coordinate of the legend.} + +\item{legend.y}{y-coordinate of the legend.} + \item{...}{Additional arguments for plot.igraph.} } \value{ @@ -61,6 +67,6 @@ Plotting of bipartite peptide-protein graphs. } \examples{ biadjacency_matrix <- matrix(c(1,1,1,0), nrow = 2) -G <- igraph::graph_from_incidence_matrix(biadjacency_matrix) -plotBipartiteGraph(G, three_shapes = TRUE, useCanonicalPermutation = TRUE) +G <- igraph::graph_from_biadjacency_matrix(biadjacency_matrix) +#plotBipartiteGraph(G, three_shapes = TRUE, useCanonicalPermutation = TRUE) } diff --git a/man/plotIsomorphList.Rd b/man/plotIsomorphList.Rd deleted file mode 100644 index a5b006f..0000000 --- a/man/plotIsomorphList.Rd +++ /dev/null @@ -1,80 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/helpers_calculate_and_plot_isomorphism_lists.R, -% R/helpers_calculate_and_plot_isomorphism_lists2.R -\name{plotIsomorphList} -\alias{plotIsomorphList} -\title{function that plots list of isomorph classes, sorted by number of occurences} -\usage{ -plotIsomorphList( - isomorph_list, - Graphs, - path, - title = TRUE, - pdf = TRUE, - cex.title = 1, - which_graphs = NULL, - mfrow = c(1, 1), - save = TRUE, - title_format = "times+percent", - ..., - height = 15, - width = 15 -) - -plotIsomorphList( - isomorph_list, - Graphs, - path, - title = TRUE, - pdf = TRUE, - cex.title = 1, - which_graphs = NULL, - mfrow = c(1, 1), - save = TRUE, - title_format = "times+percent", - ..., - height = 15, - width = 15 -) -} -\arguments{ -\item{isomorph_list}{result of calculateIsomorphList} - -\item{Graphs}{list of graphs} - -\item{path}{path to save plots} - -\item{title}{if TRUE, title is added with number of occurence and percentage value} - -\item{pdf}{if TRUE, plot is saved in a single odf, if FALSE as multiple pngs} - -\item{cex.title}{size of title} - -\item{which_graphs}{ranks of classes that should be plottet (e.g. 1:10 for top 10 classes)} - -\item{mfrow}{mfrow agrument of par function to arrance graphs in one plot} - -\item{save}{if TRUE, graphs will be saved as pdf or png} - -\item{title_format}{"times+percent" or "percent"} - -\item{...}{further arguments to plotBipartiteGraph} - -\item{height}{height of plot} - -\item{width}{width of plot} -} -\value{ -plots saved as a pdf or multiple png files - -plots saved as a pdf or multiple png files -} -\description{ -function that plots list of isomorph classes, sorted by number of occurences - -function that plots list of isomorph classes, sorted by number of occurences -} -\examples{ -### TODO -### TODO -} diff --git a/man/read_MQ_peptidetable.Rd b/man/read_MQ_peptidetable.Rd index d77d362..faba77e 100644 --- a/man/read_MQ_peptidetable.Rd +++ b/man/read_MQ_peptidetable.Rd @@ -25,10 +25,12 @@ read_MQ_peptidetable( \item{zeroToNA}{If TRUE, zeros are converted to NAs.} -\item{remove_empty_rows}{} +\item{remove_empty_rows}{If TRUE, rows with only NAs are removed.} + +\item{further_columns_to_keep}{additional columns to keep, except peptide sequence and intensities} } \value{ -Dataframe with sequences and intensities +Data frame with sequences and intensities } \description{ Import of MaxQuant's peptide.txt-table diff --git a/tests/testthat/test-graph_generation_FASTA.R b/tests/testthat/test-graph_generation_FASTA.R index 8f31d12..b70adb1 100644 --- a/tests/testthat/test-graph_generation_FASTA.R +++ b/tests/testthat/test-graph_generation_FASTA.R @@ -1,6 +1,6 @@ test_that("digestion of a FASTA file", { - digested_proteins <- readRDS(test_path("testfiles/digested_proteins_test.rds")) + digested_proteins <- readRDS(testthat::test_path("testfiles/digested_proteins_test.rds")) file <- system.file("extdata", "uniprot_test.fasta", package = "bppg") fasta <- seqinr::read.fasta(file = file, seqtype = "AA", as.string = TRUE) @@ -12,9 +12,9 @@ test_that("digestion of a FASTA file", { test_that("generation of an edgelist", { - edgelist <- readRDS(test_path("testfiles/edgelist_test.rds")) + edgelist <- readRDS(testthat::test_path("testfiles/edgelist_test.rds")) - digested_proteins <- readRDS(test_path("testfiles/digested_proteins_test.rds")) + digested_proteins <- readRDS(testthat::test_path("testfiles/digested_proteins_test.rds")) res <- generate_edgelist(digested_proteins) expect_equal(res, edgelist) @@ -22,9 +22,9 @@ test_that("generation of an edgelist", { test_that("collapsing of edgelists", { - edgelist_coll_pept_prot <- readRDS(test_path("testfiles/edgelist_coll_pept_prot_test.rds")) + edgelist_coll_pept_prot <- readRDS(testthat::test_path("testfiles/edgelist_coll_pept_prot_test.rds")) - edgelist <- readRDS(test_path("testfiles/edgelist_test.rds")) + edgelist <- readRDS(testthat::test_path("testfiles/edgelist_test.rds")) res <- bppg::collapse_edgelist(edgelist, collapse_protein_nodes = TRUE, collapse_peptide_nodes = TRUE) @@ -32,9 +32,9 @@ test_that("collapsing of edgelists", { expect_equal(res, edgelist_coll_pept_prot) - edgelist_coll_prot <- readRDS(test_path("testfiles/edgelist_coll_prot_test.rds")) + edgelist_coll_prot <- readRDS(testthat::test_path("testfiles/edgelist_coll_prot_test.rds")) - edgelist <- readRDS(test_path("testfiles/edgelist_test.rds")) + edgelist <- readRDS(testthat::test_path("testfiles/edgelist_test.rds")) res2 <- bppg::collapse_edgelist(edgelist, collapse_protein_nodes = TRUE, collapse_peptide_nodes = FALSE) @@ -54,39 +54,36 @@ test_that("generation of graphs from edgelist", { edgelist_coll_pept_prot <- readRDS(testthat::test_path("testfiles/edgelist_coll_pept_prot_test.rds")) res <- bppg::generate_graphs_from_edgelist(edgelist_coll_pept_prot) - expect_true(bppg::isomorphic_bipartite(res[[1]], igraph::upgrade_graph(graphs_coll_pept_prot[[1]]))) - expect_true(bppg::isomorphic_bipartite(res[[2]], igraph::upgrade_graph(graphs_coll_pept_prot[[2]]))) + expect_true(bppg::isomorphic_bipartite(res[[1]], graphs_coll_pept_prot[[1]])) + expect_true(bppg::isomorphic_bipartite(res[[2]], graphs_coll_pept_prot[[2]])) + expect_true(bppg::isomorphic_bipartite(res[[3]], graphs_coll_pept_prot[[3]])) # with collapsing of only protein nodes edgelist_coll_prot <- readRDS(test_path("testfiles/edgelist_coll_prot_test.rds")) res2 <- bppg::generate_graphs_from_edgelist(edgelist_coll_prot) - expect_true(bppg::isomorphic_bipartite(res2[[1]], igraph::upgrade_graph(graphs_coll_prot[[1]]))) - expect_true(bppg::isomorphic_bipartite(res2[[2]], igraph::upgrade_graph(graphs_coll_prot[[2]]))) + expect_true(bppg::isomorphic_bipartite(res2[[1]], graphs_coll_prot[[1]])) + expect_true(bppg::isomorphic_bipartite(res2[[2]], graphs_coll_prot[[2]])) + expect_true(bppg::isomorphic_bipartite(res2[[3]], graphs_coll_prot[[3]])) - - # TODO: evtl ist es nicht ganz ideal hier mit einer Funktio aus bppg (isomorphic_bipartite) den Test zu machen - # evtl wegen alter igraph version (siehe testthat output?) }) test_that("subgraph characteristics table", { - ## TODO: evtl weiteres Beispiel mit einem Graph mit mind. einem Protein ohne uniques Peptid - expected <- data.frame( - graph_ID = c(1L,2L), - nr_protein_nodes = c(7L,1L), - nr_peptide_nodes = c(13L,1L), - nr_unique_peptide_nodes = c(7L,1L), - nr_shared_peptide_nodes = c(6L,0L), - nr_edges = c(22L,1L), - nr_protein_accessions = c(7L,1L), - nr_peptide_sequences = c(476L, 204L), - nr_prot_node_only_unique_pep = c(0,1), - nr_prot_node_unique_and_shared_pep = c(7,0), - nr_prot_node_only_shared_pep = c(0, 0), - comparison = c(1L,1L) + graph_ID = c(1L,2L,3L), + nr_protein_nodes = c(7L,1L,2L), + nr_peptide_nodes = c(13L,1L,2L), + nr_unique_peptide_nodes = c(7L,1L,1L), + nr_shared_peptide_nodes = c(6L,0L,1L), + nr_edges = c(22L,1L,3L), + nr_protein_accessions = c(7L,1L,2L), + nr_peptide_sequences = c(476L, 204L,47L), + nr_prot_node_only_unique_pep = c(0,1,0L), + nr_prot_node_unique_and_shared_pep = c(7L,0L,1L), + nr_prot_node_only_shared_pep = c(0L, 0L, 1L), + comparison = c(1L,1L, 1L) ) G <- readRDS(test_path("testfiles/graphs_coll_pept_prot_test.rds")) diff --git a/tests/testthat/testfiles/digested_proteins_test.RData b/tests/testthat/testfiles/digested_proteins_test.RData deleted file mode 100644 index f5b2fff..0000000 Binary files a/tests/testthat/testfiles/digested_proteins_test.RData and /dev/null differ diff --git a/tests/testthat/testfiles/digested_proteins_test.rds b/tests/testthat/testfiles/digested_proteins_test.rds index ecd1ea9..ac07fca 100644 Binary files a/tests/testthat/testfiles/digested_proteins_test.rds and b/tests/testthat/testfiles/digested_proteins_test.rds differ diff --git a/tests/testthat/testfiles/edgelist_coll__prot_test.rds b/tests/testthat/testfiles/edgelist_coll__prot_test.rds deleted file mode 100644 index e1b2089..0000000 Binary files a/tests/testthat/testfiles/edgelist_coll__prot_test.rds and /dev/null differ diff --git a/tests/testthat/testfiles/edgelist_coll_pept_prot_test.rds b/tests/testthat/testfiles/edgelist_coll_pept_prot_test.rds index 2e3b4f3..cab4f58 100644 Binary files a/tests/testthat/testfiles/edgelist_coll_pept_prot_test.rds and b/tests/testthat/testfiles/edgelist_coll_pept_prot_test.rds differ diff --git a/tests/testthat/testfiles/edgelist_coll_prot_test.rds b/tests/testthat/testfiles/edgelist_coll_prot_test.rds index 354c1c0..731fb6d 100644 Binary files a/tests/testthat/testfiles/edgelist_coll_prot_test.rds and b/tests/testthat/testfiles/edgelist_coll_prot_test.rds differ diff --git a/tests/testthat/testfiles/edgelist_test.rds b/tests/testthat/testfiles/edgelist_test.rds index e1b2089..4c15069 100644 Binary files a/tests/testthat/testfiles/edgelist_test.rds and b/tests/testthat/testfiles/edgelist_test.rds differ diff --git a/tests/testthat/testfiles/edgelist_test.rds.rds b/tests/testthat/testfiles/edgelist_test.rds.rds new file mode 100644 index 0000000..c110329 Binary files /dev/null and b/tests/testthat/testfiles/edgelist_test.rds.rds differ diff --git a/tests/testthat/testfiles/graphs_coll_pept_prot_test.rds b/tests/testthat/testfiles/graphs_coll_pept_prot_test.rds index 920e4ff..eb870a2 100644 Binary files a/tests/testthat/testfiles/graphs_coll_pept_prot_test.rds and b/tests/testthat/testfiles/graphs_coll_pept_prot_test.rds differ diff --git a/tests/testthat/testfiles/graphs_coll_prot_test.rds b/tests/testthat/testfiles/graphs_coll_prot_test.rds index 615a410..582f138 100644 Binary files a/tests/testthat/testfiles/graphs_coll_prot_test.rds and b/tests/testthat/testfiles/graphs_coll_prot_test.rds differ