Skip to content

Commit

Permalink
Address #1013
Browse files Browse the repository at this point in the history
  • Loading branch information
PoisonAlien committed Apr 5, 2024
1 parent d63e0a2 commit eb36af4
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 17 deletions.
1 change: 0 additions & 1 deletion R/ClinicalEnrichment.R
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,6 @@ clinicalEnrichment = function(maf, clinicalFeature = NULL, annotationDat = NULL,
})

plist = data.table::rbindlist(l = plist, fill = TRUE)
print(plist)

pw.pvals = plist[Analysis %in% "Pairwise",.(Hugo_Symbol, Feature_1, Feature_2, n_mutated_Feature1, n_mutated_Feature2, fdr)][order(fdr)]
gw.pvals = plist[Analysis %in% "Group",.(Hugo_Symbol, Group1, Group2, n_mutated_group1, n_mutated_group2, p_value, OR, OR_low, OR_high)][order(p_value)]
Expand Down
17 changes: 2 additions & 15 deletions R/read_maf_dt.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#' @param gisticDelGenesFile Deletion Genes file generated by gistic. e.g; del_genes.conf_XX.txt, where XX is the confidence level. Default NULL.
#' @param gisticScoresFile scores.gistic file generated by gistic. Default NULL
#' @param cnLevel level of CN changes to use. Can be 'all', 'deep' or 'shallow'. Default uses all i.e, genes with both 'shallow' or 'deep' CN changes
#' @param cnTable Custom copynumber data if gistic results are not available. Input file or a data.frame should contain three columns in aforementioned order with gene name, Sample name and copy number status (either 'Amp' or 'Del'). Default NULL.
#' @param cnTable Custom copynumber data if gistic results are not available. Input file or a data.frame should contain three columns in aforementioned order with gene name, Sample name and copy number status (either 'Amp' or 'Del'). Default NULL. Recommended to include additional columns `Chromosome` `Start_Position` `End_Position`
#' @param isTCGA Is input MAF file from TCGA source. If TRUE uses only first 12 characters from Tumor_Sample_Barcode.
#' @param removeDuplicatedVariants removes repeated variants in a particuar sample, mapped to multiple transcripts of same Gene. See Description. Default TRUE.
#' @param vc_nonSyn NULL. Provide manual list of variant classifications to be considered as non-synonymous. Rest will be considered as silent variants. Default uses Variant Classifications with High/Moderate variant consequences. https://m.ensembl.org/info/genome/variation/prediction/predicted_data.html: "Frame_Shift_Del", "Frame_Shift_Ins", "Splice_Site", "Translation_Start_Site","Nonsense_Mutation", "Nonstop_Mutation", "In_Frame_Del","In_Frame_Ins", "Missense_Mutation"
Expand Down Expand Up @@ -60,19 +60,6 @@ read.maf = function(maf, clinicalData = NULL, rmFlags = FALSE, removeDuplicatedV
skip = "Hugo_Symbol",
quote = ""
)

# if(as.logical(length(grep(pattern = 'gz$', x = maf, fixed = FALSE)))){
# #If system is Linux use fread, else use gz connection to read gz file.
# if(Sys.info()[['sysname']] == 'Windows'){
# maf.gz = gzfile(description = maf, open = 'r')
# suppressWarnings(maf <- data.table::as.data.table(read.csv(file = maf.gz, header = TRUE, sep = '\t', stringsAsFactors = FALSE, comment.char = "#")))
# close(maf.gz)
# } else{
# maf = suppressWarnings(data.table::fread(cmd = paste('zcat <', maf), sep = '\t', stringsAsFactors = FALSE, verbose = FALSE, data.table = TRUE, showProgress = TRUE, header = TRUE, fill = TRUE, skip = "Hugo_Symbol", quote = ""))
# }
# } else{
# suppressWarnings(maf <- data.table::fread(input = maf, sep = "\t", stringsAsFactors = FALSE, verbose = FALSE, data.table = TRUE, showProgress = TRUE, header = TRUE, fill = TRUE, skip = "Hugo_Symbol", quote = ""))
# }
}

#2. validate MAF file
Expand Down Expand Up @@ -179,7 +166,7 @@ read.maf = function(maf, clinicalData = NULL, rmFlags = FALSE, removeDuplicatedV
}else{
cnDat = data.table::fread(input = cnTable, sep = '\t', stringsAsFactors = FALSE, header = TRUE, colClasses = 'character')
}
colnames(cnDat) = c('Hugo_Symbol', 'Tumor_Sample_Barcode', 'Variant_Classification')
colnames(cnDat)[1:3] = c('Hugo_Symbol', 'Tumor_Sample_Barcode', 'Variant_Classification')
if(isTCGA){
cnDat[,Tumor_Sample_Barcode := substr(x = cnDat$Tumor_Sample_Barcode, start = 1, stop = 12)]
}
Expand Down
22 changes: 21 additions & 1 deletion R/subsetMaf.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#' @param isTCGA Is input MAF file from TCGA source.
#' @param dropLevels Default TRUE.
#' @param restrictTo restrict subset operations to these. Can be 'all', 'cnv', or 'mutations'. Default 'all'. If 'cnv' or 'mutations', subset operations will only be applied on copy-number or mutation data respectively, while retaining other parts as is.
#' @param keepNA Keep NAs while sub-setting for ranges. Default `FALSE` - removes rows with missing loci prior to overlapping. Set to TRUE to keep them as is.
#' @return subset table or an object of class \code{\link{MAF-class}}
#' @seealso \code{\link{getFields}}
#' @examples
Expand All @@ -32,7 +33,7 @@
#'
#' @export

subsetMaf = function(maf, tsb = NULL, genes = NULL, query = NULL, clinQuery = NULL, ranges = NULL, mult = "first", fields = NULL, mafObj = TRUE, includeSyn = TRUE, isTCGA = FALSE, dropLevels = TRUE, restrictTo = 'all'){
subsetMaf = function(maf, tsb = NULL, genes = NULL, query = NULL, clinQuery = NULL, ranges = NULL, keepNA = FALSE, mult = "first", fields = NULL, mafObj = TRUE, includeSyn = TRUE, isTCGA = FALSE, dropLevels = TRUE, restrictTo = 'all'){

if(all(c(is.null(tsb), is.null(genes), is.null(query), is.null(ranges), is.null(clinQuery)))){
stop("Please provide sample names or genes or a query or ranges to subset by.")
Expand Down Expand Up @@ -150,9 +151,28 @@ subsetMaf = function(maf, tsb = NULL, genes = NULL, query = NULL, clinQuery = NU
maf.silent$Start_Position = as.numeric(as.character(maf.silent$Start_Position))
maf.silent$End_Position = as.numeric(as.character(maf.silent$End_Position))

#Remove NAs prior to foverlaps
na_pos = maf.dat[is.na(Start_Position) | is.na(End_Position)]
na_pos_silent = maf.silent[is.na(Start_Position) | is.na(End_Position)]

maf.dat = maf.dat[!is.na(Start_Position)][!is.na(Start_Position)]
maf.silent = maf.silent[!is.na(Start_Position)][!is.na(Start_Position)]

maf.dat = data.table::foverlaps(x = maf.dat, y = ranges, type = "within", nomatch = NULL, verbose = FALSE, mult = mult)
maf.silent = data.table::foverlaps(x = maf.silent, y = ranges, type = "within", nomatch = NULL, verbose = FALSE, mult = mult)
message(paste0(nrow(maf.dat)+nrow(maf.silent), " variants within provided ranges"))

if(keepNA){
maf.dat = data.table::rbindlist(l = list(maf.dat, na_pos), use.names = TRUE, fill = TRUE)
maf.silent = data.table::rbindlist(l = list(maf.silent, na_pos_silent), use.names = TRUE, fill = TRUE)
if(nrow(na_pos)+nrow(na_pos_silent) > 0){
warning("Added back ", nrow(na_pos)+nrow(na_pos_silent), " rows with no loci info.")
}
}else{
if(nrow(na_pos)+nrow(na_pos_silent) > 0){
warning("Removed ", nrow(na_pos)+nrow(na_pos_silent), " rows with no loci info.")
}
}
}


Expand Down
1 change: 1 addition & 0 deletions inst/NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

## BUG FIXES
- MAJOR: `read.maf` by default coerces clinical data columns to character. This bug fix avoids it and is auto detected. Issue: [997](https://github.com/PoisonAlien/maftools/issues/997)
- Handle missing `NA`s while sub-setting for ranges. Issue: [1013](https://github.com/PoisonAlien/maftools/issues/1013)

## ENHANCEMENTS
- Better sorting of oncoplot with `collapsePathway`
Expand Down
3 changes: 3 additions & 0 deletions man/subsetMaf.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions vignettes/oncoplots.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ custom.cn.data = data.frame(
head(custom.cn.data)
# Its recommended to also include additional columns Chromosome, Start_Position, End_Position
laml.plus.cn = read.maf(maf = laml.maf,
cnTable = custom.cn.data,
verbose = FALSE)
Expand Down

0 comments on commit eb36af4

Please sign in to comment.