diff --git a/R/filterCopyNumber.R b/R/filterCopyNumber.R index cbcd6ec..d02501d 100644 --- a/R/filterCopyNumber.R +++ b/R/filterCopyNumber.R @@ -19,7 +19,7 @@ filterCopyNumber = function(seg, tsb.dat, tempCheck, tsb){ } #Remove copy number altered variants. - tsb.dat.cn.vars = tsb.dat[!CN >1.5 & CN < 2.5] + tsb.dat.cn.vars = tsb.dat[!(CN >1.5 & CN < 2.5)] if(nrow(tsb.dat.cn.vars) > 0){ message('Copy number altered variants:') tsb.dat.cn.vars$cluster = 'CN_altered' diff --git a/R/inferTumHetero.R b/R/inferTumHetero.R index 304ffaf..c9f13bb 100644 --- a/R/inferTumHetero.R +++ b/R/inferTumHetero.R @@ -75,10 +75,13 @@ inferHeterogeneity = function(maf, tsb = NULL, top = 5, vafCol = NULL, segFile = seg.tsbs = unique(seg.dat[,Sample]) - if(length(seg.tsbs[!seg.tsbs %in% tsb]) > 0){ + if(sum(!tsb %in% seg.tsbs) > 0){ message("CN data for following samples not found. Ignoring them ..") - print(seg.tsbs[!seg.tsbs %in% tsb]) - seg.tsbs = seg.tsbs[seg.tsbs %in% tsb] + print(tsb[!tsb %in% seg.tsbs]) + seg.tsbs = tsb[tsb %in% seg.tsbs] + } else { + ## This is used to keep data for selected samples + seg.tsbs = tsb } if(length(seg.tsbs) > 0){ @@ -120,29 +123,29 @@ inferHeterogeneity = function(maf, tsb = NULL, top = 5, vafCol = NULL, segFile = for(i in 1:length(tsb)){ - message(paste('Processing ', tsb[i],'..', sep='')) + message('Processing ', tsb[i], '..') tsb.dat = dat.tsb[Tumor_Sample_Barcode %in% tsb[i]] tsb.dat = tsb.dat[!is.na(tsb.dat$t_vaf),] + #nvm this. Variable for later use + tempCheck = 0 + + if(!is.null(segFile)){ + if(tsb[i] %in% seg.tsbs){ + seg = seg.dat[Sample %in% tsb[i]] + #Map copynumber and variants; filter variants on CN altered regions. + seg.res = filterCopyNumber(seg, tsb.dat, tempCheck, tsb[i]) + tsb.dat = seg.res[[1]] + tsb.dat.cn.vars = seg.res[[2]] + tempCheck = seg.res[[3]] + } + } + if(nrow(tsb.dat) < 3){ #Less than 3 variants might not be useful. message('Too few mutations for clustering. Skipping..') }else{ - #nvm this. Variable for later use - tempCheck = 0 - - if(!is.null(segFile)){ - if(tsb[i] %in% seg.tsbs){ - seg = seg.dat[Sample %in% tsb[i]] - #Map copynumber and variants; filter variants on CN altered regions. - seg.res = filterCopyNumber(seg, tsb.dat, tempCheck, tsb[i]) - tsb.dat = seg.res[[1]] - tsb.dat.cn.vars = seg.res[[2]] - tempCheck = seg.res[[3]] - } - } - #cluster if(dirichlet){ #Awesome blog post on non-finite mixture models @@ -172,9 +175,13 @@ inferHeterogeneity = function(maf, tsb = NULL, top = 5, vafCol = NULL, segFile = } } - #Caluclate cluster means - clust.dat.mean = clust.dat[,mean(t_vaf), by = .(Tumor_Sample_Barcode, cluster)] - colnames(clust.dat.mean)[ncol(clust.dat.mean)] = 'meanVaf' + if (is.null(clust.dat)) { + message("No result, this is basically caused by no copy number neutral variants,\n you may re-run this without copy number data.") + } else { + #Caluclate cluster means + clust.dat.mean = clust.dat[,mean(t_vaf), by = .(Tumor_Sample_Barcode, cluster)] + colnames(clust.dat.mean)[ncol(clust.dat.mean)] = 'meanVaf' - return(list(clusterData = clust.dat, clusterMeans = clust.dat.mean)) + return(list(clusterData = clust.dat, clusterMeans = clust.dat.mean)) + } } diff --git a/R/validateMaf.R b/R/validateMaf.R index 862deeb..2c45b99 100644 --- a/R/validateMaf.R +++ b/R/validateMaf.R @@ -85,6 +85,12 @@ validateMaf = function(maf, rdup = TRUE, isTCGA = isTCGA, chatty = TRUE){ } } + # Check type of variant position + if (any(!is.numeric(maf$Start_Position), !is.numeric(maf$End_Position))) { + maf$Start_Position = as.integer(maf$Start_Position) + maf$End_Position = as.integer(maf$End_Position) + } + # Set Factors maf$Tumor_Sample_Barcode = as.factor(maf$Tumor_Sample_Barcode)