Skip to content

Commit

Permalink
a corrected version taking the ancestors.
Browse files Browse the repository at this point in the history
add GO Ancestor
  • Loading branch information
ZW-xjtlu committed Apr 23, 2018
1 parent 915f2f1 commit 02a8481
Show file tree
Hide file tree
Showing 23 changed files with 441 additions and 498 deletions.
Binary file modified .DS_Store
Binary file not shown.
Binary file removed GOslim_gene_lst_BP.rds
Binary file not shown.
Binary file removed GOslim_gene_lst_CC.rds
Binary file not shown.
Binary file removed GOslim_gene_lst_MF.rds
Binary file not shown.
11 changes: 5 additions & 6 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
# Generated by roxygen2: do not edit by hand

export(GOEA)
export(GOslimEA)
export(exact2slim)
export(gene2go)
export(gene2goslim)
importFrom(AnnotationDbi,Term)
importFrom(AnnotationDbi,select)
importFrom(GSEABase,GOCollection)
importFrom(GSEABase,goSlim)
importFrom(dplyr,left_join)
importFrom(GO.db,GOBPANCESTOR)
importFrom(GO.db,GOCCANCESTOR)
importFrom(GO.db,GOMFANCESTOR)
importFrom(GO.db,GOTERM)
Binary file modified R/.DS_Store
Binary file not shown.
50 changes: 37 additions & 13 deletions R/GOEA.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,38 @@
#' @param orgDb an \code{OrgDb} object defined by AnnotationDbi.
#' @param category a character specifying the gene ontology category, can be one in "BP", "CC", and "MF", default "BP".
#' @param gene_key a character specifying the type of the gene ID, the available types of the keys can be find using \code{keytypes(org.Hs.eg.db)}, default "ENTREZID".
#' @param min_bg_count term minimum number of occurence in background gene set; default 1.
#' @param max_bg_count term maximum number of occurence in background gene set; default 1000.
#' @param min_bg_count term minimum number of occurence in background genes; default 1.
#' @param max_bg_count term maximum number of occurence in background genes; default Inf.
#' @param min_gs_count term minimum number of occurence in gene set genes; default 1.
#' @param max_gs_count term maximum number of occurence in gene set genes; default Inf.
#' @param EASE_Score whether or not use EASE score method. (a more conservative hypergeomatric test calculation used in DAVID)
#'for more details please refer to \url{https://david.ncifcrf.gov/helps/functional_annotation.html#fisher}, default FALSE.
#' @param pvalue_correction method used for multiple hypothesis adjustment, can be one in "holm", "hochberg", "hommel", "bonferroni", "BH", "BY","fdr", and "none".
#' @param interpret_term whether to let the GO term readable, default FALSE.
#' @param show_gene_name whether to attach readable gene names for each GO term, default FALSE.
#' @importFrom AnnotationDbi select
#' @param GO_Slim Wheather to run GSEA only on GO slim terms (a certain subset to GO terms), default FALSE.
#' @param Slim_ss A character sting of GO terms that define the scope of GO Slim. if not provided, the GO slim would be the generic subset defined in : \url{http://geneontology.org/ontology/subsets/goslim_generic.obo}
#' @param Exclude_self whether the GO slim terms of its own category i.e. remove terms of c("GO:0008150","GO:0005575","GO:0003674"), default TRUE; only applied when GO_Slim = TRUE.
#'
#' @importFrom AnnotationDbi select Term
#' @importFrom GO.db GOTERM
#' @export
GOEA <- function(gene_set,
back_ground,
orgDb,
category="BP",
gene_key = "ENTREZID",
min_bg_count = 1,
max_bg_count = 1000,
max_bg_count = Inf,
min_gs_count = 1,
max_gs_count = Inf,
EASE_Score= F,
pvalue_correction = "BH",
interpret_term = F,
show_gene_name = F,
attach_slim = F,
exclude_root_slim = F
GO_Slim = F,
Slim_ss = NULL,
Exclude_self = T
) {

if(any(duplicated(back_ground))) {
Expand All @@ -44,30 +54,43 @@ GOEA <- function(gene_set,

stopifnot(category %in% c("BP","CC","MF"))

GO_indx <- gene2go(back_ground,gene_key,orgDb,category)
GO_indx <- gene2go(Gene_ID = back_ground,
Gene_key_type = gene_key,
OrgDB = orgDb,
Category = category,
Slim = GO_Slim,
Slim_subset = Slim_ss,
Exclude_self_slim = Exclude_self)

GO_tb <- table(GO_indx$GO)

filter_go <- GO_tb >= min_bg_count & GO_tb <= max_bg_count

GO_indx <- GO_indx[ GO_indx$GO %in% (names(GO_tb)[filter_go]), ]
GO_indx <- GO_indx[ GO_indx$GO %in% (names(GO_tb)[filter_go]), ] #Drop the genes as well as terms not match to back ground filter

Freq_bg <- table( as.character( GO_indx$GO ) )

bg_genes_num <- sum(Freq_bg)

result_lst <- list()

for(i in 1:length(gene_set) ) {

Freq_gs <- table( as.character( GO_indx$GO [ GO_indx[[gene_key]] %in% gene_set[[i]] ] ) )
indx_match <- GO_indx[[gene_key]] %in% gene_set[[i]]

Freq_gs <- table( as.character( GO_indx$GO [ indx_match ] ) )

Freq_gs <- Freq_gs[ Freq_gs >= min_gs_count & Freq_gs <= max_gs_count ]

result_lst[[i]] <- gsea(
freq_gs = Freq_gs,
freq_bg = Freq_bg,
gs_total_gene = length(gene_set[[i]]),
bg_total_gene = length(back_ground),
gs_total_gene = sum(Freq_gs),
bg_total_gene = bg_genes_num,
adj_method = pvalue_correction,
ease = EASE_Score
)

}

if(show_gene_name) {
Expand All @@ -82,9 +105,10 @@ GOEA <- function(gene_set,
}

if(interpret_term) {
term_defs <- Term(GOTERM)
result_lst <- lapply(result_lst, function(x) {
x$term = GO_term_indx[x$term]
return(x)
x$definition = term_defs[x$term]
return(x[,c(1,7,2,3,4,5,6)])
})
}

Expand Down
109 changes: 0 additions & 109 deletions R/GOslimEA.R

This file was deleted.

40 changes: 0 additions & 40 deletions R/exact2slim.R

This file was deleted.

50 changes: 46 additions & 4 deletions R/gene2go.R
Original file line number Diff line number Diff line change
@@ -1,17 +1,26 @@
#' @title A function that map Gene IDs to GO terms.
#' @return a \code{data.frame} that define the mapping relationship.
#' @title A function that map Gene IDs to GO terms (including the ancestors).
#' @return a \code{data.frame} that define the mapping relationship between genes and GO terms.
#' @param Gene_ID gene ids to query.
#' @param Gene_key_type a character specifying the type of the ID, the available types of the keys can be find using \code{keytypes(org.Hs.eg.db)}, default "ENTREZID".
#' @param OrgDB an \code{OrgDb} object defined by AnnotationDbi.
#' @param Category a character specifying the gene ontology category, can be one in "BP", "CC", and "MF", default "BP".
#' @param Drop_zero_match whether to drop the genes that mapped to no terms at all, default FALSE.
#' @param Slim Wheather to return a mapping to GO slim terms (a certain subset to GO terms), default FALSE.
#' @param Slim_subset A character sting of GO terms that define the scope of GO Slim. if not provided, the GO slim would be the generic subset defined in : \url{http://geneontology.org/ontology/subsets/goslim_generic.obo}
#' @param Exclude_self_slim whether the GO slim terms of its own category i.e. remove terms of c("GO:0008150","GO:0005575","GO:0003674").
#'
#'
#' @importFrom AnnotationDbi select
#' @importFrom GO.db GOBPANCESTOR GOMFANCESTOR GOCCANCESTOR
#' @export
gene2go <- function(Gene_ID,
Gene_key_type = "ENTREZID",
OrgDB,
Category = "BP",
Drop_zero_match = F) {
Drop_zero_match = F,
Slim = F,
Slim_subset = NULL,
Exclude_self_slim = T) {

stopifnot(Category %in% c("BP","CC","MF"))

Expand All @@ -25,15 +34,48 @@ gene2go <- function(Gene_ID,

Map_result <- Map_result[,colnames(Map_result) %in% c(Gene_key_type,"GO")]

#Retrieve the ancestors

GO_pergene <- split(Map_result$GO,Map_result$ENTREZID)

ancest_lst <- as.list(eval( parse(text = paste0("GO",Category,"ANCESTOR")) ))

goancest_pergene <- lapply(GO_pergene, function(x) {
unique_ancest <- unique(unlist(ancest_lst[x],use.names = F))
return(unique_ancest[unique_ancest!="all"])
} )

if(Slim) {

if(is.null(Slim_subset)) {
Slim_subset <- slim_generic
}
if(Exclude_self_slim) {
Slim_subset <- Slim_subset[!Slim_subset %in% c("GO:0008150","GO:0005575","GO:0003674")]
}
goancest_pergene <- lapply(goancest_pergene, function(x) x[x%in%Slim_subset])
}

Map_result <- data.frame(
ENTREZID = rep(names(goancest_pergene),elementNROWS(goancest_pergene)),
GO = unlist(goancest_pergene)
)

if(!Drop_zero_match) {

df_attach <- data.frame( ENTREZID = Gene_ID[!Gene_ID %in% Map_result$ENTREZID],
indx_missing = !Gene_ID %in% Map_result$ENTREZID

if(sum(indx_missing) != 0) {

df_attach <- data.frame( ENTREZID = Gene_ID[indx_missing],
GO = NA)

Map_result <- rbind(Map_result,df_attach)

}

}

rownames(Map_result) = NULL

return(Map_result)
Expand Down
Loading

0 comments on commit 02a8481

Please sign in to comment.