Skip to content

Commit

Permalink
Adress #939 #940
Browse files Browse the repository at this point in the history
  • Loading branch information
PoisonAlien committed Jul 17, 2023
1 parent d4e5319 commit 2dce987
Show file tree
Hide file tree
Showing 7 changed files with 90 additions and 104 deletions.
22 changes: 22 additions & 0 deletions .github/workflows/stale_bot.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: Close inactive issues
on:
schedule:
- cron: "30 1 * * *"

jobs:
close-issues:
runs-on: ubuntu-latest
permissions:
issues: write
pull-requests: write
steps:
- uses: actions/stale@v5
with:
days-before-issue-stale: 60
days-before-issue-close: 14
stale-issue-label: "stale"
stale-issue-message: "This issue is stale because it has been open for 30 days with no activity."
close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale."
days-before-pr-stale: -1
days-before-pr-close: -1
repo-token: ${{ secrets.GITHUB_TOKEN }}
120 changes: 35 additions & 85 deletions R/annovarToMaf.R
Original file line number Diff line number Diff line change
Expand Up @@ -117,24 +117,16 @@ annovarToMaf = function(annovar, Center = NULL, refBuild = 'hg19', tsbCol = NULL
ncRNA_splicing = "RNA"
)

ann_exonic = ann[Func.refGene %in% 'exonic']
ann_res = ann[!Func.refGene %in% 'exonic']
if(nrow(ann_exonic) ==0 & nrow(ann_res) == 0){
stop("No suitable exonic or intronic variants found!")
}

#Exonic variants
if(nrow(ann_exonic) > 0){
cat("-Processing Exonic variants\n")
#Choose first entry for mixed annotations (e.g; exonic;splicing)
ann[, Func.refGene := unlist(data.table::tstrsplit(x = as.character(Func.refGene), split = ";", keep = 1))]
ann[, ExonicFunc.refGene := unlist(data.table::tstrsplit(x = as.character(ExonicFunc.refGene), split = ";", keep = 1))]
ann$Variant_Classification = ifelse(is.na(ann$ExonicFunc.refGene), annovar_values[ann$Func.refGene], annovar_values[ann$ExonicFunc.refGene])

#Choose first entry for mixed annotations (e.g; exonic;splicing)
ann_exonic[, Func.refGene := data.table::tstrsplit(x = as.character(ann_exonic$Func.refGene), split = ";", keep = 1)]
cat("--Adding Variant_Classification\n")
ann_exonic[,Variant_Classification := annovar_values[ExonicFunc.refGene]]
if(nrow(ann[!is.na(AAChange.refGene)]) > 0){
cat("--Extracting tx, exon, txchange and aa-change\n")
ann_change = ann[!is.na(AAChange.refGene)]
aa_change = unlist(data.table::tstrsplit(x = as.character(ann_change$AAChange.refGene),split = ',', fixed = TRUE, keep = 1))

#Parse aa-change for exonic variants
cat("--Parsing aa-change\n")
aa_change = unlist(data.table::tstrsplit(x = as.character(ann_exonic$AAChange.refGene),split = ',', fixed = TRUE, keep = 1))
aa_tbl = lapply(aa_change, function(x){
x = unlist(strsplit(x = x, split = ":", fixed = TRUE))

Expand All @@ -155,80 +147,34 @@ annovarToMaf = function(annovar, Center = NULL, refBuild = 'hg19', tsbCol = NULL
data.table::data.table(tx, exon, txChange, aaChange)
})
aa_tbl = data.table::rbindlist(l = aa_tbl)

if(length(aa_change) != nrow(ann_exonic)){
stop("Something went wrong parsing aa-change")
if(length(aa_change) != nrow(ann_change)){
stop("Something went wrong while parsing aa-change")
}
ann_exonic = cbind(ann_exonic, aa_tbl)
}

#Non-exonic variants
if(nrow(ann_res) > 0){
cat("-Processing Non-exonic variants\n")
ann_res[, Func.refGene := data.table::tstrsplit(x = as.character(ann_res$Func.refGene), split = ";", keep = 1)]
cat("--Adding Variant_Classification\n")
ann_res[,Variant_Classification := annovar_values[Func.refGene]]
#Merge exonic and non-exonic variants
ann = data.table::rbindlist(l = list(ann_exonic, ann_res), use.names = TRUE, fill = TRUE)
}else{
ann = ann_exonic
ann_change = cbind(ann_change, aa_tbl)
ann = data.table::rbindlist(l = list(ann_change, ann[is.na(AAChange.refGene)]), use.names = TRUE, fill = TRUE)
}

#Add Variant-type annotations based on difference between ref and alt alleles
cat("-Adding Variant_Type\n")
ann[,ref_alt_diff := nchar(Ref) - nchar(Alt)]
ann[, Variant_Type := ifelse(
ref_alt_diff == 0 ,
yes = ifelse(test = Ref != "-" & Alt != "-", yes = "SNP",
no = ifelse(ref_alt_diff < 0 , yes = "INS", no = "DEL")), no = NA)
]

#Check for MNPs (they are neither INDELS nor SNPs)
ann$Variant_Type = ifelse(
test = ann$Variant_Type == "SNP",
yes = ifelse(
test = nchar(ann$Ref) > 1,
yes = "MNP",
no = "SNP"
),
no = ann$Variant_Type
)
ann$Variant_Type = ifelse(endsWith(x = ann$Variant_Classification, suffix = "_Del"), yes = "DEL", no = NA)
ann$Variant_Type = ifelse(endsWith(x = ann$Variant_Classification, suffix = "_Ins"), yes = "INS", no = ann$Variant_Type)
ann$Variant_Type = ifelse(ann$Variant_Classification %in% c("Missense_Mutation", "Silent") , yes = "SNP", no = ann$Variant_Type)

#Annotate MNPs as Unknown VC
ann_mnps = ann[Variant_Type %in% "MNP"]
if(nrow(ann_mnps) > 0){
ann = ann[!Variant_Type %in% "MNP"]
ann_mnps[,Variant_Classification := "Unknown"]
ann = rbind(ann, ann_mnps)
rm(ann_mnps)
}

#Frameshift_INDEL, Inframe_INDEL are annotated by annovar without INS or DEL status
#Parse this based on difference between ref and alt alleles
ann_indel = ann[Variant_Classification %in% c("Frameshift_INDEL", "Inframe_INDEL")]
if(nrow(ann_indel) > 0){
cat("-Fixing ambiguous INDEL annotations\n")
ann = ann[!Variant_Classification %in% c("Frameshift_INDEL", "Inframe_INDEL")]
vc_fixed = lapply(1:nrow(ann_indel), function(i) {
x = ann_indel[i, Variant_Classification]
if (x == "Frameshift_INDEL") {
if (ann_indel[i, ref_alt_diff] > 0) {
return("Frame_Shift_Del")
} else{
return("Frame_Shift_Ins")
}
} else if (x == "Inframe_INDEL") {
if (ann_indel[i, ref_alt_diff] > 0) {
return("In_Frame_Del")
} else{
return("In_Frame_Ins")
}
} else{
return(x)
}
})
ann_indel[,Variant_Classification := unlist(vc_fixed)]
ann = rbind(ann, ann_indel)
ann[,ref_alt_len := nchar(Ref) + nchar(Alt)]
ann[,ref_alt_diff := nchar(Ref) - nchar(Alt)]
ann$Variant_Type = ifelse(ann$ref_alt_diff < 0 , yes = "INS", no = ann$Variant_Type)
ann$Variant_Type = ifelse(ann$ref_alt_diff > 0 , yes = "DEL", no = ann$Variant_Type)

#For ambiguous variants such as DNPs, TNPs and ONPs
if(nrow(ann[is.na(Variant_Type)]) > 0){
ann[, ref_alt := paste(Ref, Alt, sep = ">")]
ann_na = ann[is.na(Variant_Type)]
ann_na$Variant_Type = ifelse(test = ann_na$ref_alt %in% paste("-", c("A", "T", "G", "C"), sep = ">"), yes = "INS",
no = ifelse(test = ann_na$ref_alt %in% paste(c("A", "T", "G", "C"), "-", sep = ">"), yes = "DEL",
no = ifelse(ann_na$ref_alt_len == 2, yes = "SNP", no = ifelse(test = ann_na$ref_alt_len == 4, yes = "DNP",
no = "ONP"))))
ann[, ref_alt := NULL]
ann = data.table::rbindlist(l = list(ann[!is.na(Variant_Type)], ann_na), use.names = TRUE, fill = TRUE)
}

#Annovar ensGene doesn't provide HGNC gene symbols as Hugo_Symbol. We will change them manually.
Expand All @@ -246,14 +192,18 @@ annovarToMaf = function(annovar, Center = NULL, refBuild = 'hg19', tsbCol = NULL
}
}
ann[, ref_alt_diff := NULL]
ann[, ref_alt_len := NULL]

#MAF requires Hugo_Symbols to be set to Unknown in case of IGR mutations
ann$Hugo_Symbol = ifelse(ann$Variant_Classification == "IGR", yes = "Unknown", no = ann$Hugo_Symbol)
ann$Hugo_Symbol = ifelse(is.na(ann$Hugo_Symbol), yes = "Unknown", no = ann$Hugo_Symbol)

#Re-roganize columns
colnames(ann)[match(c("Chr", "Start", "End", "Ref", "Alt"), colnames(ann))] = c("Chromosome", "Start_Position", "End_Position", "Reference_Allele", "Tumor_Seq_Allele2")
ord1 = colnames(x = ann)[colnames(x = ann) %in% c("Hugo_Symbol", "Chromosome", "Start_Position", "End_Position", "Reference_Allele", "Tumor_Seq_Allele2", "Variant_Classification", "Variant_Type", "Tumor_Sample_Barcode", "tx", "exon", "txChange", "aaChange")]
ord2 = colnames(x = ann)[!colnames(x = ann) %in% c("Hugo_Symbol", "Chromosome", "Start_Position", "End_Position", "Reference_Allele", "Tumor_Seq_Allele2", "Variant_Classification", "Variant_Type", "Tumor_Sample_Barcode", "tx", "exon", "txChange", "aaChange")]
ann = ann[,c(ord1, ord2), with = FALSE]


if(!is.null(basename)){
data.table::fwrite(x = ann, file = paste(basename, 'maf', sep='.'), sep='\t')
}
Expand Down
12 changes: 6 additions & 6 deletions R/extractSignatures.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,17 @@ extractSignatures = function(mat, n = NULL, plotBestFitRes = FALSE, parallel = 4
colnames(w) = paste('Signature', 1:ncol(w),sep='_')

#Contribution
h = NMF::coef(conv.mat.nmf)
colnames(h) = colnames(mat) #correct colnames (seems to be mssing with low mutation load)
h_abs = NMF::coef(conv.mat.nmf)
colnames(h_abs) = colnames(mat) #correct colnames (seems to be mssing with low mutation load)
#For single signature, contribution will be 100% per sample
if(n == 1){
h = h/h
h = h_abs/h_abs
rownames(h) = paste('Signature', '1', sep = '_')
}else{
h = apply(h, 2, function(x) x/sum(x)) #Scale contributions (coefs)
rownames(h) = paste('Signature', 1:nrow(h),sep='_')
h = apply(h_abs, 2, function(x) x/sum(x)) #Scale contributions (coefs)
rownames(h_abs) = rownames(h) = paste('Signature', 1:nrow(h),sep='_')
}

message("-Finished in",data.table::timetaken(start_time))
return(list(signatures = w, contributions = h, nmfObj = conv.mat.nmf))
return(list(signatures = w, contributions = h, nmfObj = conv.mat.nmf, contributions_abs = h_abs))
}
18 changes: 12 additions & 6 deletions R/plotSignatures.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@
#'
#' @param nmfRes results from \code{\link{extractSignatures}}
#' @param contributions If TRUE plots contribution of signatures in each sample.
#' @param absolute Whether to plot absolute contributions. Default FALSE.
#' @param color colors for each Ti/Tv conversion class. Default NULL
#' @param patient_order User defined ordering of samples. Default NULL.
#' @param title_size size of title. Default 1.3
#' @param axis_lwd axis width. Default 2.
#' @param font_size font size. Default 1.2
#' @param font_size font size. Default 0.6
#' @param show_title If TRUE compares signatures to COSMIC signatures and prints them as title
#' @param sig_db Only applicable if show_title is TRUE. Can be \code{legacy} or \code{SBS}. Default \code{legacy}
#' @param show_barcodes Default FALSE
Expand All @@ -18,11 +19,16 @@
#' @seealso \code{\link{trinucleotideMatrix}} \code{\link{plotSignatures}}
#' @export
#'
plotSignatures = function(nmfRes = NULL, contributions = FALSE, color = NULL, patient_order = NULL,
font_size = 1.2, show_title = TRUE, sig_db = "legacy", axis_lwd = 2, title_size = 0.9, show_barcodes = FALSE, yaxisLim = 0.3, ...){
plotSignatures = function(nmfRes = NULL, contributions = FALSE, absolute = FALSE, color = NULL, patient_order = NULL,
font_size = 0.6, show_title = TRUE, sig_db = "legacy", axis_lwd = 2, title_size = 0.9, show_barcodes = FALSE, yaxisLim = 0.3, ...){

conv.mat.nmf.signatures = nmfRes$signatures
contrib = nmfRes$contributions
if(absolute){
contrib = nmfRes$contributions_abs
}else{
contrib = nmfRes$contributions
}

coSineMat = nmfRes$coSineSimMat

if(contributions){
Expand All @@ -45,7 +51,7 @@ plotSignatures = function(nmfRes = NULL, contributions = FALSE, color = NULL, pa
b = barplot(contrib, axes = FALSE, horiz = FALSE, col = cols, border = NA, names.arg = rep("", ncol(contrib)))
axis(side = 1, at = b, labels = colnames(contrib), lwd = 2, cex.axis = font_size,
las = 2, line = 0.2, hadj = 0.8, font = 1, tick = FALSE)
axis(side = 2, at = seq(0, 1, 0.25), lwd = 3, font = 1, las = 2, cex.axis = 0.9)
axis(side = 2, at = pretty(contrib), lwd = 1, font = 1, las = 2, cex.axis = 0.9)
mtext(text = "Signature exposures", side = 2, font = 1, cex = 1, line = 2.8)
plot.new()
par(mar = c(2, 3, 0, 0))
Expand All @@ -56,7 +62,7 @@ plotSignatures = function(nmfRes = NULL, contributions = FALSE, color = NULL, pa
lo = graphics::layout(mat = matrix(data = c(1, 2), nrow = 2), heights = c(6, 2))
par(mar = c(3, 4, 2, 1))
b = barplot(contrib, axes = FALSE, horiz = FALSE, col = cols, border = NA, names.arg = rep("", ncol(contrib)))
axis(side = 2, at = seq(0, 1, 0.25), lwd = 3, font = 1, las = 2, cex.axis = 0.9)
axis(side = 2, at = pretty(contrib), lwd = 1, font = 1, las = 2, cex.axis = 0.9)
mtext(text = "Signature exposure", side = 2, font = 1, cex = 1, line = 2.8)
plot.new()
par(mar = c(2, 3, 0, 0))
Expand Down
8 changes: 4 additions & 4 deletions R/validateMaf.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,18 +37,18 @@ validateMaf = function(maf, rdup = TRUE, isTCGA = isTCGA, chatty = TRUE){
if(chatty){
cat('--Found ', nrow(maf[Hugo_Symbol %in% ""]), ' variants with no Gene Symbols\n')
#print(maf[Hugo_Symbol %in% "", required.fields, with = FALSE])
cat("--Annotating them as 'UnknownGene' for convenience\n")
cat("--Annotating them as 'Unknown' for convenience\n")
}
maf$Hugo_Symbol = ifelse(test = maf$Hugo_Symbol == "", yes = 'UnknownGene', no = maf$Hugo_Symbol)
maf$Hugo_Symbol = ifelse(test = maf$Hugo_Symbol == "", yes = 'Unknown', no = maf$Hugo_Symbol)
}

if(nrow(maf[is.na(Hugo_Symbol)]) > 0){
if(chatty){
cat('--Found ', nrow(maf[is.na(Hugo_Symbol) > 0]), ' variants with no Gene Symbols\n')
#print(maf[is.na(Hugo_Symbol), required.fields, with =FALSE])
cat("--Annotating them as 'UnknownGene' for convenience\n")
cat("--Annotating them as 'Unknown' for convenience\n")
}
maf$Hugo_Symbol = ifelse(test = is.na(maf$Hugo_Symbol), yes = 'UnknownGene', no = maf$Hugo_Symbol)
maf$Hugo_Symbol = ifelse(test = is.na(maf$Hugo_Symbol), yes = 'Unknown', no = maf$Hugo_Symbol)
}

if(isTCGA){
Expand Down
7 changes: 6 additions & 1 deletion inst/NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,20 @@
(GitHub master branch)

## NEW FUNCTIONS
- `coGisticChromPlot` for plotting two GISTIC objects side-by-side. [954](https://github.com/PoisonAlien/maftools/pull/954)
- `coGisticChromPlot` for plotting two GISTIC objects side-by-side. PR by [biosunsci](https://github.com/biosunsci) [954](https://github.com/PoisonAlien/maftools/pull/954)
- `readGistic` can take gistic output directory as an input. PR by [biosunsci](https://github.com/biosunsci) [954](https://github.com/PoisonAlien/maftools/pull/954)

## BUG FIXES
- Bug fix in `oncoplot` for drawing borders. [958](https://github.com/PoisonAlien/maftools/issues/958)
- Bug fix in `plotSignatures` for hardcoded axis limits. [949](https://github.com/PoisonAlien/maftools/issues/949)
- Bug fix in `subsetMaf` while handling only CNV events. [908](https://github.com/PoisonAlien/maftools/issues/908)
- Error handling when no deep/shallow CNV events found. [899](https://github.com/PoisonAlien/maftools/issues/899)
- Bug fix in `oncoplot` for duplicated values in gene list. [889](https://github.com/PoisonAlien/maftools/issues/889)

## ENHANCEMENTS
- Improved `annovarToMaf` with better handling of indels and `Variant_Type`. Issue: [940](https://github.com/PoisonAlien/maftools/issues/940)
- Include absolute contribution of each signature in `extractSignatures` output. Issue: [939](https://github.com/PoisonAlien/maftools/issues/939)
- Added `DSEL` protein to the database. Issue: [933](https://github.com/PoisonAlien/maftools/issues/933)
- Added `showOnlyPathway` argument to `oncoplot`
- Added `pathdb` argument to `PlotOncogenicPathways`. Issue: [923](https://github.com/PoisonAlien/maftools/issues/923)
- Emit warnings when fishers test can not be performed during `somaticInteractions`. Issue: [921](https://github.com/PoisonAlien/maftools/issues/921)
Expand Down
7 changes: 5 additions & 2 deletions man/plotSignatures.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 2dce987

Please sign in to comment.