diff --git a/.Rbuildignore b/.Rbuildignore index 705005d2..d5b3746e 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,12 +1,16 @@ +^Meta$ +^doc$ ^\.travis\.yml$ ^R/\.wip ^docker ^run_tests ^\. +^inferCNV\.wiki ^infercnv.Rdata ^inferCNV.Rproj ^Rstudio_helpers ^__simulations +^example ^example/full_precision ^example/example.html ^example/test_subdir diff --git a/.gitignore b/.gitignore index 8327614f..5644a7f2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +Meta +doc .idea .Rproj.user .Rhistory diff --git a/.travis.yml b/.travis.yml index 2775229a..a4af77ef 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,3 +1,13 @@ language: r r: bioc-release +before_install: + - sudo apt-get update + - sudo apt-get install r-cran-rjags + +# r_build_args: --no-build-vignettes --no-manual --no-resave-data +r_check_args: --no-build-vignettes # --no-manual + +#script: +#- travis_wait R CMD build . +#- R CMD check --no-build-vignettes *tar.gz diff --git a/DESCRIPTION b/DESCRIPTION index 881744ff..dba04d97 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,27 +1,38 @@ Package: infercnv Type: Package Title: Infer Copy Number Variation from Single-Cell RNA-Seq Data -Version: 0.8.2 +Version: 0.99.0 Date: 2018-11-08 Authors@R: c( person("Timothy", "Tickle", email = "ttickle@broadinstitute.org", role = c("aut", "cre")), person("Itay", "Tirosh", email = "tirosh@broadinstitute.org", role = "aut"), person("Christophe", "Georgescu", email = "cgeorges@broadinstitute.org", role = "aut"), person("Maxwell", "Brown", email = "mbrown@broadinstitute.org", role = "aut"), person("Brian", "Haas", email = "bhaas@broadinstitute.org", role = "aut")) Author: Timothy Tickle [aut, cre], Itay Tirosh [aut], Christophe Georgescu [aut], Maxwell Brown [aut], Brian Haas [aut] Maintainer: Christophe Georgescu BugReports: https://github.com/broadinstitute/inferCNV/issues Description: Using single-cell RNA-Seq expression to visualize CNV in cells. -Depends: R(>= 3.2.1) +biocViews: Software, CopyNumberVariation, VariantDetection, StructuralVariation, GenomicVariation, Genetics, Transcriptomics, StatisticalMethod, Bayesian, HiddenMarkovModel, SingleCell +Depends: R(>= 3.5) License: BSD_3_clause + file LICENSE LazyData: TRUE VignetteBuilder: knitr Suggests: BiocStyle, knitr, rmarkdown, testthat -Enhances: NGCHM -RoxygenNote: 6.1.0 +RoxygenNote: 6.1.1 NeedsCompilation: no -Imports: RColorBrewer, gplots, futile.logger, stats, utils, methods, ape, Matrix, binhf, fastcluster, dplyr, coin +SystemRequirements: JAGS 4.x.y +Imports: graphics, grDevices, RColorBrewer, gplots, futile.logger, stats, utils, methods, ape, Matrix, fastcluster, dplyr, HiddenMarkov, ggplot2, edgeR, coin, caTools, reshape, rjags, fitdistrplus, future, foreach, doParallel, BiocGenerics, SummarizedExperiment, SingleCellExperiment, tidyr, parallel, coda, gridExtra, argparse +BugReports: https://github.com/broadinstitute/inferCNV/issues +URL: https://github.com/broadinstitute/inferCNV/wiki Collate: - 'NextGenHeatMap.R' + 'SplatterScrape.R' 'inferCNV.R' + 'inferCNV_BayesNet.R' + 'inferCNV_HMM.R' 'inferCNV_constants.R' 'inferCNV_heatmap.R' + 'inferCNV_hidden_spike.R' + 'inferCNV_i3HMM.R' 'inferCNV_mask_non_DE.R' + 'inferCNV_meanVarSim.R' 'inferCNV_ops.R' - 'inferCNV_spike.R' + 'inferCNV_simple_sim.R' + 'inferCNV_tumor_subclusters.R' + 'inferCNV_tumor_subclusters.random_smoothed_trees.R' + 'noise_reduction.R' diff --git a/NAMESPACE b/NAMESPACE index 18265d54..f657e075 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,49 +1,100 @@ # Generated by roxygen2: do not edit by hand +export(.i3HMM_get_sd_trend_by_num_cells_fit) export(CreateInfercnvObject) export(anscombe_transform) export(apply_max_threshold_bounds) +export(apply_median_filtering) +export(assign_HMM_states_to_proxy_expr_vals) export(center_cell_expr_across_chromosome) export(clear_noise) export(clear_noise_via_ref_mean_sd) -export(compute_normalization_factor) +export(determine_mean_delta_via_Z) +export(filterHighPNormals) +export(generate_cnv_region_reports) export(get_DE_genes_basic) export(get_average_bounds) +export(get_hspike_cnv_mean_sd_trend_by_num_cells_fit) +export(get_predicted_CNV_regions) export(get_reference_grouped_cell_indices) +export(get_spike_dists) +export(i3HMM_assign_HMM_states_to_proxy_expr_vals) +export(i3HMM_predict_CNV_via_HMM_on_indiv_cells) +export(i3HMM_predict_CNV_via_HMM_on_tumor_subclusters) +export(i3HMM_predict_CNV_via_HMM_on_whole_tumor_samples) +export(inferCNVBayesNet) export(invert_log2) export(invert_log2xplus1) export(log2xplus1) export(make_zero_NA) export(mask_non_DE_genes_basic) export(mean_center_gene_expr) -export(ngchm) export(normalize_counts_by_seq_depth) export(plot_cnv) +export(predict_CNV_via_HMM_on_indiv_cells) +export(predict_CNV_via_HMM_on_tumor_subclusters) +export(predict_CNV_via_HMM_on_whole_tumor_samples) export(remove_genes) export(remove_genes_at_ends_of_chromosomes) export(remove_outliers_norm) -export(remove_spike) export(require_above_min_cells_ref) export(require_above_min_mean_expr_cutoff) +export(returningInferCNV) export(run) -export(scale_cnv_by_spike) +export(scale_infercnv_expr) export(smooth_by_chromosome) -export(spike_in_variation_chrs) +export(smooth_by_chromosome_runmeans) export(split_references) export(subtract_ref_expr_from_obs) export(transform_to_reference_based_Zscores) +exportClasses(MCMC_inferCNV) exportClasses(infercnv) +exportMethods(MeanSD) +exportMethods(cellGene) +exportMethods(getGenesCells) +exportMethods(getProbabilities) +exportMethods(getStates) +exportMethods(initializeObject) +exportMethods(mcmcDiagnosticPlots) +exportMethods(nonParallel) +exportMethods(plotProbabilities) +exportMethods(postProbNormal) +exportMethods(removeCNV) +exportMethods(removeCells) +exportMethods(returningInferCNV) +exportMethods(runMCMC) +exportMethods(setBayesMaxPNormal) +exportMethods(withParallel) import(RColorBrewer) -import(coin) +import(argparse) +import(coda) +import(doParallel) +import(fitdistrplus) +import(foreach) import(futile.logger) +import(future) +import(ggplot2) +importFrom(BiocGenerics,counts) +importFrom(BiocGenerics,t) importFrom(Matrix,Matrix) importFrom(Matrix,colSums) importFrom(Matrix,rowMeans) +importFrom(SingleCellExperiment,SingleCellExperiment) +importFrom(SummarizedExperiment,"assays<-") +importFrom(SummarizedExperiment,"colData<-") +importFrom(SummarizedExperiment,"rowData<-") +importFrom(SummarizedExperiment,assays) +importFrom(SummarizedExperiment,colData) +importFrom(SummarizedExperiment,rowData) +importFrom(SummarizedExperiment,start) importFrom(ape,as.phylo) importFrom(ape,write.tree) -importFrom(binhf,ansc) +importFrom(caTools,runmean) +importFrom(coin,oneway_test) +importFrom(coin,pvalue) importFrom(dplyr,"%>%") importFrom(dplyr,count) +importFrom(edgeR,estimateDisp) importFrom(fastcluster,hclust) importFrom(gplots,bluered) importFrom(grDevices,col2rgb) @@ -67,9 +118,17 @@ importFrom(graphics,points) importFrom(graphics,rect) importFrom(graphics,text) importFrom(graphics,title) +importFrom(gridExtra,gtable_combine) +importFrom(gridExtra,marrangeGrob) +importFrom(gridExtra,tableGrob) +importFrom(gridExtra,ttheme_default) importFrom(methods,is) importFrom(methods,new) importFrom(methods,setClass) +importFrom(parallel,detectCores) +importFrom(reshape,melt) +importFrom(rjags,coda.samples) +importFrom(rjags,jags.model) importFrom(stats,as.dendrogram) importFrom(stats,as.dist) importFrom(stats,complete.cases) @@ -77,21 +136,39 @@ importFrom(stats,cor) importFrom(stats,cutree) importFrom(stats,density) importFrom(stats,dist) +importFrom(stats,dnorm) +importFrom(stats,ecdf) importFrom(stats,filter) +importFrom(stats,ks.test) +importFrom(stats,lm) importFrom(stats,median) +importFrom(stats,nls) importFrom(stats,order.dendrogram) importFrom(stats,p.adjust) +importFrom(stats,pnorm) importFrom(stats,predict) +importFrom(stats,qgamma) +importFrom(stats,qnorm) importFrom(stats,quantile) +importFrom(stats,rbinom) +importFrom(stats,rchisq) importFrom(stats,reorder) +importFrom(stats,rgamma) +importFrom(stats,rlnorm) +importFrom(stats,rnbinom) importFrom(stats,rnorm) +importFrom(stats,rpois) importFrom(stats,runif) importFrom(stats,sd) +importFrom(stats,shapiro.test) importFrom(stats,smooth.spline) importFrom(stats,t.test) +importFrom(stats,update) importFrom(stats,var) importFrom(stats,wilcox.test) +importFrom(tidyr,gather) importFrom(utils,flush.console) +importFrom(utils,read.csv) importFrom(utils,read.table) importFrom(utils,tail) importFrom(utils,write.table) diff --git a/R/NextGenHeatMap.R b/R/NextGenHeatMap.R deleted file mode 100644 index 0d9daf94..00000000 --- a/R/NextGenHeatMap.R +++ /dev/null @@ -1,274 +0,0 @@ -#!/usr/bin/env Rscript - -#' @title Create Next Generation Clustered Heat Map (NG-CHM) -#' @description Create highly interactive heat maps for single cell expression data using -#' Next Generation Clustered Heat Map (NG-CHM). NG-CHM was developed and -#' maintained by MD Anderson Department of Bioinformatics and Computational -#' Biology in collaboration with In Silico Solutions. -#' -#' @param infercnv_obj (S4) InferCNV S4 object holding expression data, gene location data, annotation information. -#' @param path_to_shaidyMapGen (string) Path to the java application ShaidyMapGen.jar -#' @param out_dir (string) Path to where the infercnv.ngchm output file should be saved to -#' @param title (string) Title that will be used for the heatmap -#' @param gene_symbol (string) Specify the label type that is given to the gene needed to create linkouts, default is NULL -#' @param x.center (integer) Center expression value for heatmap coloring. -#' @param x.range (integer) Values for minimum and maximum thresholds for heatmap coloring. -#' -#' @return -#' -#' Exports a NGCHM file named infercnv.ngchm and saves it to the output directory given to infercnv. -# Requires: -# NGCHM, ape, RcolorBrewer - -Create_NGCHM <- function(infercnv_obj, - path_to_shaidyMapGen, - out_dir, - title = NULL, - gene_symbol = NULL, - x.center = NA, - x.range = NA) { - - # ----------------------Check/create Pathways----------------------------------------------------------------------------------------------- - ## check out_dir - if (file.exists(out_dir)){ - file_path <- paste(out_dir, "infercnv.ngchm", sep = .Platform$file.sep) - }else{ - dir.create(file.path(out_dir)) - paste("Creating the following Directory: ", out_dir) - } - - #----------------------Initialize Next Generation Clustered Heat Map------------------------------------------------------------------ - # transpose the expression data so columns are the cell lines and rows are genes - plot_data <- t(infercnv_obj@expr.data) - # create color map for the heat map and save it as a new data layer - # cut_value is the value that represents cuts on the heatmap - cut_value <- -2147483648.0 - # if specific center value is not given, set to 1 - if (any(is.na(x.center))) { - x.center <- 1 - } - # if the range values are not given, will set appropriate values - if (! any(is.na(x.range))) { - ## if the range values are provided, use defined values - low_threshold <- x.range[1] - high_threshold <- x.range[2] - if (low_threshold > x.center | high_threshold < x.center | low_threshold >= high_threshold) { - x.center <- 0 - if (low_threshold > x.center | high_threshold < x.center | low_threshold >= high_threshold) { - stop(paste("Error, problem with relative values of x.range: ", x.range, ", and x.center: ", x.center)) - } - } - } else { - ## else, if not given, set the values - bounds <- get_average_bounds(infercnv_obj) - low_threshold <- as.numeric(bounds[1]) - high_threshold <- as.numeric(bounds[2]) - } - colMap <- NGCHM::chmNewColorMap(values = c(cut_value, low_threshold, x.center, high_threshold), - colors = c("grey45","darkblue","white","darkred"), - missing.color = "white", - type = "linear") - layer <- NGCHM::chmNewDataLayer("DATA", as.matrix(plot_data), colMap, summarizationMethod = "average") - # create the heat map object with the name "inferCNV" - if (is.null(title)){ - title = "inferCNV" - } - hm <- NGCHM::chmNew(title, layer) - # set the column (gene) order - hm@colOrder <- colnames(plot_data) - hm@colOrderMethod <- "User" - - # add linkouts to each gene (column) for more information - if (!is.null(gene_symbol)) { - hm <- NGCHM::chmAddAxisType(hm, 'col', gene_symbol) - } - - ## set variables - ref_index = infercnv_obj@reference_grouped_cell_indices - reference_idx = row.names(plot_data[unlist(ref_index),]) - ref_groups = names(ref_index) - - # ---------------------- Import Dendrogram & Order Rows ----------------------------------------------------------------------------------- - # IF Cluster By Group is set to TRUE: - # Get the order of the rows (cell lines) from the dendrogram created by infer_cnv - - # read the file containing the groupings created by infer_cnv - row_groups_path <- paste(out_dir, "infercnv.observation_groupings.txt", sep=.Platform$file.sep) - row_groups <- read.table(row_groups_path, header = TRUE, check.names = FALSE) # genes are the row names - obs_order <- rev(row.names(row_groups)) # Reveerse names to correct order - row_order <- c(as.vector(reference_idx), obs_order) # put the reference cells above the observed cells - ## check for correct dimensions of new row order - if (length(row_order) != nrow(plot_data)) { - stop("Error: After ordering the rows, row length does not match original dimensions of the data. - \n Difference in row length: Original ", nrow(plot_data), ", After ordering ", length(row_order)) - } - ## set the row order for the heatmap - hm@rowOrder <- row_order - hm@rowOrderMethod <- "User" - - # ----------------------Add Divisions Between References And Chromosomes ------------------------------------------------------------------ - # Column Separation: separation between the chromosomes - - ## get the correct order of the chromosomes - ordering <- unique(infercnv_obj@gene_order[['chr']]) - ## get gene locations in correct order, then find frequency of each chromosome - ## add locations to each gene - location_data <- infercnv_obj@gene_order - location_data$Gene <- row.names(infercnv_obj@gene_order) - gene_order = colnames(plot_data) - gene_locations_merge <- merge(data.frame(Gene = colnames(plot_data), stringsAsFactors = FALSE), location_data, by.x = "Gene") - gene_locations <- gene_locations_merge[match(gene_order,gene_locations_merge$Gene),] - # - # ## check if the number of genes has changed - if (nrow(gene_locations) != length(colnames(plot_data))){ - warning(paste0("Number of similar genes between expression data and locations:", nrow(gene_locations), - "\n Total number of genes in expression data: ", length(colnames(plot_data)), - "\n Check to make sure all the genes are in the location file and the gene names are the same between files.")) - } - ## put in order - ordered_locations <- table(gene_locations[['chr']])[ordering] - cumulative_len <- cumsum(ordered_locations) #cumulative sum, separation locations - sep_col_idx <- cumulative_len[-1 * length(cumulative_len)] # drop the last index because we do not want to add a break at the very end - sep_col_idx <- rep(1,length(sep_col_idx)) + sep_col_idx # add one to each index, want to be to the right of the last gene in that chr - ## colCutLocations: locations where the cuts will occur - ## colCutWidth: the width of the cuts - hm@colCutLocations <- as.integer(sep_col_idx) - hm@colCutWidth <- as.integer(30) - - # Row separation: separation between reference samples and observed samples - hm@rowCutLocations <- as.integer(length(reference_idx)+1) - # make the size of the separation proportional to the size of the heat map - ## use ncol because plot_data has cell ID's as the columns - row_sep <- ceiling(nrow(plot_data)*.01) - hm@rowCutWidth <- as.integer(row_sep) - - #----------------------Create Covariate Bar---------------------------------------------------------------------------------------------------------------------------------------- - # Returns the color palette for contigs. - get_group_color_palette <- function(){ - return(colorRampPalette(RColorBrewer::brewer.pal(12,"Set3"))) - } - # # check to make sure all cell lines are included - if (!(all(obs_order %in% row_order))) { - missing_ids <- row_groups[which(!(obs_order %in% row_order))] - error_message <- paste("Groupings of cell line ID's in observation_groupings.txt \n", - "do not match the ID's in the expression data.\n", - "Check the following cell line ID's: ", - paste(missing_ids, collapse = ",")) - } - #---------------------COLUMN Covariate bar---------------------------------------------------------------------------------------------------------------------- - # COLUMN Covariate bar - ## map the genes to their chromosome - ## gene_locations: created earlier, Genes and their locations - chr_labels <- as.vector(ordering) - ## get the chromosomes - chr <- as.character(gene_locations$chr) - ## get the gene ID's - names(chr) <- gene_locations$Gene - - chr_palette <- get_group_color_palette()(length(unique(location_data$chr))) - names(chr_palette) <- unique(location_data$chr) - - ## create color mapping - colMap_chr <- NGCHM::chmNewColorMap(values = as.vector(chr_labels), - colors = chr_palette, - missing.color = "white") - chr_cov <- NGCHM::chmNewCovariate(fullname = 'Chromosome', - values = chr, - value.properties = colMap_chr, - type = "discrete") - hm <- NGCHM::chmAddCovariateBar(hm, "column", chr_cov, - display = "visible", - thickness = as.integer(20)) - - #---------------------ROW Covariate bar---------------------------------------------------------------------------------------------------------------------- - - # create covariate bar from dendrogram groups - ## row_groups is taken from the dendrogram created by inferCNV - ## create better column names - colnames(row_groups) <- c("Dendrogram.Group", "Dendrogram.Color", "Annotation.Group", "Annotation.Color") - dendrogram_col <- as.character(unlist(row_groups["Dendrogram.Color"]))# group colors - dendrogram_group <- as.character(unlist(row_groups["Dendrogram.Group"]))# group number - dendrogram_unique_group <- unique(dendrogram_group) - cells <- row.names(row_groups) # cell line ID's - names(dendrogram_col) <- cells - names(dendrogram_group) <- cells - dendrogram_palette <- get_group_color_palette()(length(unique(dendrogram_col))) - ## create color mapping - colMap_dendrogram <- NGCHM::chmNewColorMap(values = as.vector(dendrogram_unique_group), - colors = dendrogram_palette, - missing.color = "white") - dendrogram_cov <- NGCHM::chmNewCovariate(fullname = 'Dendrogram', - values = dendrogram_group, - value.properties = colMap_dendrogram, - type = "discrete") - hm <- NGCHM::chmAddCovariateBar(hm, "row", dendrogram_cov, - display = "visible", - thickness = as.integer(20)) - - # Covariate to identify Reference and Observed data - annotation_col <- as.character(unlist(row_groups["Annotation.Color"])) # group colors - annotation_group <- as.character(unlist(row_groups["Annotation.Group"]))# group number - names(annotation_group) <- cells - names(annotation_col) <- cells - annotation_unique_group <- unique(annotation_group) - - len <-lengths(ref_index) - ref_bar_labels <- unlist(sapply(1:length(len), function(x){ rep(ref_groups[x],len[x]) })) - names(ref_bar_labels) <- reference_idx - - # if you want the exact coloring as the original inferCNV plots - #annotation_palette <- c(get_group_color_palette()(length(ref_index)), get_group_color_palette()(length(annotation_unique_group))) - - # combine reference and observed labels - annotation_group <- c(ref_bar_labels,annotation_group) - - # change the observed group names in bar to group namnes - observed_data <- infercnv_obj@observation_grouped_cell_indices - lapply(1:length(observed_data), function(x) { - tmp <- names(observed_data[x]) - annotation_group <<- replace(annotation_group, observed_data[[x]], tmp) } ) - unique_group <- unique(annotation_group) - annotation_palette <- get_group_color_palette()(length(unique_group)) - - # check if all reference cells are included - if (!(all(reference_idx %in% names(annotation_group)))){ - missing_refs <- reference_idx[which(!(reference_idx %in% names(annotation_group)))] - error_message <- paste("Error: Not all references are accounted for.", - "Make sure the reference names match the names in the data.\n", - "Check the following reference cell lines: ", - paste(missing_refs, collapse = ",")) - stop(error_message) - } - # check if all observed cells are included - observed_idx <- row.names(plot_data[unlist(infercnv_obj@observation_grouped_cell_indices),]) - if (!(all(observed_idx %in% names(annotation_group)))){ - missing_obs <- reference_idx[which(!(observed_idx %in% names(annotation_group)))] - error_message <- paste("Error: Not all observed cell lines are accounted for.", - "Make sure the reference names match the names in the data.\n", - "Check the following reference cell lines: ", - paste(missing_obs, collapse = ",")) - stop(error_message) - } - - ## create color mapping - colMap_annotation <- NGCHM::chmNewColorMap(values = as.vector(unique_group), - colors = annotation_palette, - missing.color = "white") - annotation_cov <- NGCHM::chmNewCovariate(fullname = 'Annotation', - values = annotation_group, - value.properties = colMap_annotation, - type = "discrete") - hm <- NGCHM::chmAddCovariateBar(hm, "row", annotation_cov, - display = "visible", - thickness = as.integer(20)) - - #---------------------------------------Export the heat map----------------------------------------------------------------------------------------------------------------------- - ## adjust the size of the heat map - #hm@width <- as.integer(500) - #hm@height <- as.integer(500) - ## adjust label display size - #hm@rowDisplayLength <- as.integer(10) - futile.logger::flog.info(paste("Saving new NGCHM object")) - NGCHM::chmExportToFile(hm, file_path, overwrite = TRUE, shaidyMapGen = path_to_shaidyMapGen) - } - diff --git a/R/SplatterScrape.R b/R/SplatterScrape.R new file mode 100644 index 00000000..af5c1d2f --- /dev/null +++ b/R/SplatterScrape.R @@ -0,0 +1,495 @@ +################################################################################################################ +## Code here is primarily scraped from the Splatter package, leveraging just the pieces needed and further +## customized to our needs. +## +## Be sure to explore the original Splatter code as the source for these functions +## https://github.com/Oshlack/splatter +## and paper: +## Zappia L, Phipson B, Oshlack A. Splatter: simulation of single-cell RNA +## sequencing data. Genome Biology (2017). +## +## All code was 'scraped', 'lifted', whatever you want to call it, after discussions with the author Luke Zappia of the Splatter package +## as this form was easiest for integration of splatter sim methods customized to our needs. +## All attribution for single cell simulation methods is given to Zappia et al. and we're hugely thankful for being able to utilize it here. +################################################################################################################# + + +.estimateSingleCellParamsSplatterScrape <- function(counts, + include.dropout=FALSE, + use.spline.dropout.fit=FALSE # logistic is default. + ) { + + # scraped from splatter + params = list() + + params[['include.dropout']] <- include.dropout + params[['use.spline.dropout.fit']] <- use.spline.dropout.fit + + ## Normalise for library size and remove all zero genes + lib.sizes <- colSums(counts) + lib.med <- median(lib.sizes) + norm.counts <- t(t(counts) / lib.sizes * lib.med) + norm.counts <- norm.counts[rowSums(norm.counts > 0) > 1, ] + + params <- .splatEstMean(norm.counts, params) + + params <- .splatEstLib(counts, params) + + params <- .splatEstOutlier(norm.counts, params) + + params <- .splatEstBCV(counts, params) + + params <- .splatEstDropout(norm.counts, params) + + params[['nGenes']] <- nrow(counts) + params[['nCells']] <- ncol(counts) + + print(params) + + return(params) +} + + +.splatEstMean <- function(norm.counts, params) { + + # library(fitdistrplus) + + means <- rowMeans(norm.counts) + means <- means[means != 0] + + means <- .winsorize(means, q = 0.1) + + fit <- fitdistrplus::fitdist(means, "gamma", method = "mge", + gof = "CvM") + if (fit$convergence > 0) { + warning("Fitting means using the Goodness of Fit method failed, ", + "using the Method of Moments instead") + fit <- fitdistrplus::fitdist(means, "gamma", method = "mme") + } + + params[['mean.shape']] <- unname(fit$estimate["shape"]) + params[['mean.rate']] <- unname(fit$estimate["rate"]) + + return(params) +} + +.winsorize <- function(x, q) { + + lohi <- stats::quantile(x, c(q, 1 - q), na.rm = TRUE) + + if (diff(lohi) < 0) { lohi <- rev(lohi) } + + x[!is.na(x) & x < lohi[1]] <- lohi[1] + x[!is.na(x) & x > lohi[2]] <- lohi[2] + + return(x) +} + + + +.splatEstLib <- function(counts, params) { + + lib.sizes <- colSums(counts) + + if (length(lib.sizes) > 5000) { + message("NOTE: More than 5000 cells provided. ", + "5000 sampled library sizes will be used to test normality.") + lib.sizes.sampled <- sample(lib.sizes, 5000, replace = FALSE) + } else { + lib.sizes.sampled <- lib.sizes + } + + norm.test <- shapiro.test(lib.sizes.sampled) + lib.norm <- norm.test$p.value > 0.2 + + if (lib.norm) { + fit <- fitdistrplus::fitdist(lib.sizes, "norm") + lib.loc <- unname(fit$estimate["mean"]) + lib.scale <- unname(fit$estimate["sd"]) + message("NOTE: Library sizes have been found to be normally ", + "distributed instead of log-normal. You may want to check ", + "this is correct.") + } else { + fit <- fitdistrplus::fitdist(lib.sizes, "lnorm") + lib.loc <- unname(fit$estimate["meanlog"]) + lib.scale <- unname(fit$estimate["sdlog"]) + } + + params[['lib.loc']] <- lib.loc + params[['lib.scale']] <- lib.scale + params[['lib.norm']] <- lib.norm + + return(params) +} + + +.splatEstOutlier <- function(norm.counts, params) { + + means <- rowMeans(norm.counts) + lmeans <- log(means) + + med <- median(lmeans) + mad <- mad(lmeans) + + bound <- med + 2 * mad + + outs <- which(lmeans > bound) + + prob <- length(outs) / nrow(norm.counts) + + params[['out.prob']] <- prob + + if (length(outs) > 1) { + facs <- means[outs] / median(means) + fit <- fitdistrplus::fitdist(facs, "lnorm") + + params[['out.facLoc']] <- unname(fit$estimate["meanlog"]) + params[['out.facScale']] <- unname(fit$estimate["sdlog"]) + } + + return(params) +} + + +.splatEstBCV <- function(counts, params) { + + # Add dummy design matrix to avoid print statement + design <- matrix(1, ncol(counts), 1) + disps <- edgeR::estimateDisp(counts, design = design) + + + ## linear adjustment to bcv is based on somulations as per splatter code documentation. + params[['bcv.common']] <- 0.1 + 0.25 * disps$common.dispersion + params[['bcv.df']] <- disps$prior.df + + return(params) +} + + +.splatEstDropout <- function(norm.counts, params) { + + means <- rowMeans(norm.counts) + + x <- log(means) + + obs.zeros <- rowSums(norm.counts == 0) + + y <- obs.zeros / ncol(norm.counts) + + df <- data.frame(x, y) + + colnames(df) <- c('log_means', 'pct_zeros') + #write.table(df, file="dropout.dat", quote=FALSE, sep="\t") + #plot(df$log_means, df$pct_zeros) + + x_approx_mid <- median(x[which(y>0.2 & y < 0.8)]) # bhaas-added to avoid error: Error in nls(y ~ .logistic(x, x0 = x0, k = k), data = df, start = list(x0 = 0, : singular gradient + + fit <- nls(y ~ .logistic(x, x0 = x0, k = k), data = df, + start = list(x0 = x_approx_mid, k = -1)) + + mid <- summary(fit)$coefficients["x0", "Estimate"] + shape <- summary(fit)$coefficients["k", "Estimate"] + + #points(x, predict(fit, newdata=x), col='green') + + params[['dropout.mid']] <- mid + params[['dropout.shape']] <- shape + + + ## also try fitting a spline + spline.fit <- smooth.spline(x,y) + params[['dropout.spline.fit']] <- spline.fit + spline.pts = predict(spline.fit, newdata=x) + #points(spline.pts$x, spline.pts$y, col='magenta') + #legend('topright', c('logistic', 'spline'), col=c('green', 'magenta'), pch=1) + + + return(params) +} + +.logistic <- function(x, x0, k) { + 1 / (1 + exp(-k * (x - x0))) +} + + +##################################### +### End of Splat Estimation routines +##################################### +## Beginning of Splat Simulation routines +######################################### + +.simulateSingleCellCountsMatrixSplatterScrape <- function(params, + use.genes.means=NULL + ) { + + if ( (! is.null(use.genes.means)) && length(use.genes.means) != params[['nGenes']]) { + stop("Error, use.genes.means provided but not matching the params nGenes count") + } + + # library(SingleCellExperiment) + + ## Get the parameters we are going to use + nCells <- params[["nCells"]] + nGenes <- params[["nGenes"]] + + # Set up name vectors + cell.names <- paste0("Cell", seq_len(nCells)) + gene.names <- paste0("Gene", seq_len(nGenes)) + + ## Create SingleCellExperiment to store simulation + cells <- data.frame(Cell = cell.names) + rownames(cells) <- cell.names + features <- data.frame(Gene = gene.names) + rownames(features) <- gene.names + sim <- SingleCellExperiment(rowData = features, + colData = cells, + metadata = list(Params = params)) + + message("Simulating library sizes...") + sim <- .splatSimLibSizes(sim, params) + + message("Simulating gene means...") + sim <- .splatSimGeneMeans(sim, params, use.genes.means) + + sim <- .splatSimBatchCellMeans(sim, params) + + sim <- .splatSimSingleCellMeans(sim, params) + + message("Simulating BCV...") + sim <- .splatSimBCVMeans(sim, params) + + message("Simulating counts...") + sim <- .splatSimTrueCounts(sim, params) + + message("Simulating dropout (if needed)...") + sim <- .splatSimDropout(sim, params) + + return(sim) +} + +.splatSimLibSizes <- function(sim, params) { + + nCells <- params[["nCells"]] + lib.loc <- params[["lib.loc"]] + lib.scale <- params[["lib.scale"]] + lib.norm <- params[["lib.norm"]] + + if (lib.norm) { + exp.lib.sizes <- rnorm(nCells, lib.loc, lib.scale) + min.lib <- min(exp.lib.sizes[exp.lib.sizes > 0]) + exp.lib.sizes[exp.lib.sizes < 0] <- min.lib / 2 + } else { + exp.lib.sizes <- rlnorm(nCells, lib.loc, lib.scale) + } + + colData(sim)$ExpLibSize <- exp.lib.sizes + + return(sim) +} + + +.splatSimGeneMeans <- function(sim, params, use.genes.means) { + + nGenes <- params[["nGenes"]] + mean.shape <- params[["mean.shape"]] + mean.rate <- params[["mean.rate"]] + out.prob <- params[["out.prob"]] + out.facLoc <- params[["out.facLoc"]] + out.facScale <- params[["out.facScale"]] + + if (! is.null(use.genes.means)) { + base.means.gene <- use.genes.means + } else { + ## Simulate base gene means + base.means.gene <- rgamma(nGenes, shape = mean.shape, rate = mean.rate) + } + ## Add expression outliers + outlier.facs <- .getLNormFactors(nGenes, out.prob, 0, out.facLoc, + out.facScale) + median.means.gene <- median(base.means.gene) + outlier.means <- median.means.gene * outlier.facs + is.outlier <- outlier.facs != 1 + means.gene <- base.means.gene + means.gene[is.outlier] <- outlier.means[is.outlier] + + rowData(sim)$BaseGeneMean <- base.means.gene + rowData(sim)$OutlierFactor <- outlier.facs + rowData(sim)$GeneMean <- means.gene + + return(sim) +} + +.getLNormFactors <- function(n.facs, sel.prob, neg.prob, fac.loc, fac.scale) { + + is.selected <- as.logical(rbinom(n.facs, 1, sel.prob)) + n.selected <- sum(is.selected) + dir.selected <- (-1) ^ rbinom(n.selected, 1, neg.prob) + facs.selected <- rlnorm(n.selected, fac.loc, fac.scale) + # Reverse directions for factors that are less than one + dir.selected[facs.selected < 1] <- -1 * dir.selected[facs.selected < 1] + factors <- rep(1, n.facs) + factors[is.selected] <- facs.selected ^ dir.selected + + return(factors) +} + +.splatSimBatchCellMeans <- function(sim, params) { + + cell.names <- colData(sim)$Cell + gene.names <- rowData(sim)$Gene + gene.means <- rowData(sim)$GeneMean + + nCells <- params[["nCells"]] + nGenes <- params[["nGenes"]] + + batch.facs.cell <- matrix(1, ncol = nCells, nrow = nGenes) + + batch.means.cell <- batch.facs.cell * gene.means + + colnames(batch.means.cell) <- cell.names + rownames(batch.means.cell) <- gene.names + assays(sim)$BatchCellMeans <- batch.means.cell + + return(sim) +} + + + + +.splatSimSingleCellMeans <- function(sim, params) { + + nCells <- params[["nCells"]] + cell.names <- colData(sim)$Cell + gene.names <- rowData(sim)$Gene + exp.lib.sizes <- colData(sim)$ExpLibSize + batch.means.cell <- assays(sim)$BatchCellMeans + + cell.means.gene <- batch.means.cell + cell.props.gene <- t(t(cell.means.gene) / colSums(cell.means.gene)) + base.means.cell <- t(t(cell.props.gene) * exp.lib.sizes) + + colnames(base.means.cell) <- cell.names + rownames(base.means.cell) <- gene.names + assays(sim)$BaseCellMeans <- base.means.cell + + assays(sim)$CellMeans <- base.means.cell # default, updated under .splatSimBCVMeans() + + return(sim) +} + + +.splatSimBCVMeans <- function(sim, params) { + + cell.names <- colData(sim)$Cell + gene.names <- rowData(sim)$Gene + nGenes <- params[["nGenes"]] + nCells <- params[["nCells"]] + bcv.common <- params[["bcv.common"]] + bcv.df <- params[["bcv.df"]] + base.means.cell <- assays(sim)$BaseCellMeans + + if (is.finite(bcv.df)) { + bcv <- (bcv.common + (1 / sqrt(base.means.cell))) * + sqrt(bcv.df / rchisq(nGenes, df = bcv.df)) + } else { + warning("'bcv.df' is infinite. This parameter will be ignored.") + bcv <- (bcv.common + (1 / sqrt(base.means.cell))) + } + + means.cell <- matrix(rgamma(nGenes * nCells, shape = 1 / (bcv ^ 2), + scale = base.means.cell * (bcv ^ 2)), + nrow = nGenes, ncol = nCells) + + colnames(means.cell) <- cell.names + rownames(means.cell) <- gene.names + + assays(sim)$BCV <- bcv + assays(sim)$CellMeans <- means.cell + + return(sim) + +} + +.splatSimTrueCounts <- function(sim, params) { + + cell.names <- colData(sim)$Cell + gene.names <- rowData(sim)$Gene + nGenes <- params[["nGenes"]] + nCells <- params[["nCells"]] + cell.means <- assays(sim)$CellMeans + + true.counts <- matrix(rpois(nGenes * nCells, lambda = cell.means), + nrow = nGenes, ncol = nCells) + + colnames(true.counts) <- cell.names + rownames(true.counts) <- gene.names + + assays(sim)$TrueCounts <- true.counts + + return(sim) +} + +.splatSimDropout <- function(sim, params) { + + include.dropout <- params[["include.dropout"]] + true.counts <- assays(sim)$TrueCounts + dropout.mid <- params[["dropout.mid"]] + dropout.shape <- params[["dropout.shape"]] + cell.names <- colData(sim)$Cell + gene.names <- rowData(sim)$Gene + nCells <- params[["nCells"]] + nGenes <- params[["nGenes"]] + nBatches <- params[["nBatches"]] + nGroups <- params[["nGroups"]] + cell.means <- assays(sim)$CellMeans + dropout.spline.fit <- params[['dropout.spline.fit']] + + if (include.dropout) { + + if ( params[['use.spline.dropout.fit']] ) { + ## Generate probabilites based on expression + drop.prob <- sapply(seq_len(nCells), function(idx) { + eta <- log(cell.means[, idx]) + pvals <- predict(dropout.spline.fit, eta)$y + pvals[is.na(pvals)] <- 0 + pvals[pvals<0] <- 0 + pvals[pvals>1] <- 1 + return(pvals) + }) + + + } else { + # using logistic + dropout.mid <- rep(dropout.mid, nCells) + dropout.shape <- rep(dropout.shape, nCells) + + ## Generate probabilites based on expression + drop.prob <- sapply(seq_len(nCells), function(idx) { + eta <- log(cell.means[, idx]) + return(.logistic(eta, x0 = dropout.mid[idx], k = dropout.shape[idx])) + }) + } + + print(drop.prob) + + # Decide which counts to keep + keep <- matrix(rbinom(nCells * nGenes, 1, 1 - drop.prob), + nrow = nGenes, ncol = nCells) + + counts <- true.counts * keep + + colnames(drop.prob) <- cell.names + rownames(drop.prob) <- gene.names + colnames(keep) <- cell.names + rownames(keep) <- gene.names + + assays(sim)$DropProb <- drop.prob + assays(sim)$Dropout <- !keep + } else { + counts <- true.counts + } + + BiocGenerics::counts(sim) <- counts + + return(sim) +} diff --git a/R/inferCNV.R b/R/inferCNV.R index 0ab26e7b..80de4b4e 100755 --- a/R/inferCNV.R +++ b/R/inferCNV.R @@ -24,7 +24,11 @@ #' @slot reference_grouped_cell_indices mapping [['group_name']] to c(cell column indices) for reference (normal) cells #' #' @slot observation_grouped_cell_indices mapping [['group_name']] to c(cell column indices) for observation (tumor) cells +#' +#' @slot tumor_subclusters stores subclustering of tumors if requested #' +#' @slot .hspike a hidden infercnv object populated with simulated spiked-in data +#' #' @export #' @@ -35,7 +39,9 @@ infercnv <- methods::setClass( count.data = "ANY", gene_order= "data.frame", reference_grouped_cell_indices = "list", - observation_grouped_cell_indices = "list") ) + observation_grouped_cell_indices = "list", + tumor_subclusters = "ANY", + .hspike = "ANY") ) @@ -54,6 +60,12 @@ infercnv <- methods::setClass( #' #' @param delim delimiter used in the input files #' +#' @param max_cells_per_group maximun number of cells to use per group. Default=NULL, using all cells defined in the annotations_file. This option is useful for randomly subsetting the existing data for a quicker preview run, such as using 50 cells per group instead of hundreds. +#' +#' @param min_max_counts_per_cell minimum and maximum counts allowed per cell. Any cells outside this range will be removed from the counts matrix. default=NULL and uses all cells. If used, should be set as c(min_counts, max_counts) +#' +#' @param chr_exclude list of chromosomes in the reference genome annotations that should be excluded from analysis. Default = c('chrX', 'chrY', 'chrM') +#' #' @description Creation of an infercnv object. This requires the following inputs: #' A more detailed description of each input is provided below: #' @@ -104,32 +116,53 @@ infercnv <- methods::setClass( #' @export #' -CreateInfercnvObject <- function(raw_counts_matrix, gene_order_file, annotations_file, ref_group_names, delim="\t") { +CreateInfercnvObject <- function(raw_counts_matrix, + gene_order_file, + annotations_file, + ref_group_names, + delim="\t", + max_cells_per_group=NULL, + min_max_counts_per_cell=NULL, # can be c(low,high) for colsums + chr_exclude=c('chrX', 'chrY', 'chrM') ) { + + ## input expression data + if (Reduce("|", is(raw_counts_matrix) == "character")) { + flog.info(sprintf("Parsing matrix: %s", raw_counts_matrix)) - # input expression data - if (class(raw_counts_matrix) == "character") { - raw.data <- read.table(raw_counts_matrix, sep=delim, header=TRUE, row.names=1, check.names=FALSE) + if (substr(raw_counts_matrix, nchar(raw_counts_matrix)-2, nchar(raw_counts_matrix)) == ".gz") { + raw.data <- read.table(connection <- gzfile(raw_counts_matrix, 'rt'), sep=delim, header=TRUE, row.names=1, check.names=FALSE) + close(connection) + } + else { + raw.data <- read.table(raw_counts_matrix, sep=delim, header=TRUE, row.names=1, check.names=FALSE) + } + raw.data = as.matrix(raw.data) - } else if (class(raw_counts_matrix) %in% c("dgCMatrix", "matrix", "data.frame")) { + } else if (Reduce("|", is(raw_counts_matrix) %in% c("dgCMatrix", "matrix", "data.frame"))) { # use as is: raw.data <- raw_counts_matrix } else { stop("CreateInfercnvObject:: Error, raw_counts_matrix isn't recognized as a matrix, data.frame, or filename") } - - # get gene order info + + ## get gene order info + flog.info(sprintf("Parsing gene order file: %s", gene_order_file)) gene_order <- read.table(gene_order_file, header=FALSE, row.names=1, sep="\t") names(gene_order) <- c(C_CHR, C_START, C_STOP) - - # read annotations file - input_classifications <- read.table(annotations_file, header=FALSE, row.names=1, sep=delim, stringsAsFactors=FALSE) - - # just in case the first line is a default header, remove it: + if (! is.null(chr_exclude)) { + gene_order = gene_order[-which(gene_order$chr %in% chr_exclude),] + } + + ## read annotations file + flog.info(sprintf("Parsing cell annotations file: %s", annotations_file)) + input_classifications <- read.table(annotations_file, header=FALSE, row.names=1, sep=delim, stringsAsFactors=FALSE, colClasses = 'character') + + ## just in case the first line is a default header, remove it: if (rownames(input_classifications)[1] == "V1") { - input_classifications = input_classifications[-1, , drop=F] + input_classifications = input_classifications[-1, , drop=FALSE] } - # make sure all reference samples are accounted for: + ## make sure all reference samples are accounted for: if (! all( rownames(input_classifications) %in% colnames(raw.data)) ) { missing_cells <- rownames(input_classifications)[ ! ( rownames(input_classifications) %in% colnames(raw.data) ) ] @@ -141,29 +174,74 @@ CreateInfercnvObject <- function(raw_counts_matrix, gene_order_file, annotations stop(error_message) } - # restrict expression data to the annotated cells. + ## Determine if we need to do filtering on counts per cell + if (! is.null(min_max_counts_per_cell)) { + min_counts_per_cell = min_max_counts_per_cell[1] + max_counts_per_cell = min_max_counts_per_cell[2] + + cs = colSums(raw.data) + + cells.keep <- which(cs >= min_counts_per_cell & cs <= max_counts_per_cell) + + n_orig_cells <- ncol(raw.data) + n_to_remove <- n_orig_cells - length(cells.keep) + + flog.info(sprintf("-filtering out cells < %g or > %g, removing %g %% of cells", + min_counts_per_cell, + max_counts_per_cell, + n_to_remove/n_orig_cells * 100) ) + + raw.data <- raw.data[, cells.keep] + + input_classifications <- input_classifications[ rownames(input_classifications) %in% colnames(raw.data), , drop=FALSE] + + orig_ref_group_names = ref_group_names + ref_group_names <- ref_group_names[ ref_group_names %in% unique(input_classifications[,1]) ] + if (! all.equal(ref_group_names, orig_ref_group_names)) { + flog.warn(sprintf("-warning, at least one reference group has been removed due to cells lacking: %s", + orig_ref_group_names[! orig_ref_group_names %in% ref_group_names ] )) + } + } + + + if (! is.null(max_cells_per_group)) { + ## trim down where needed. + grps = split(input_classifications, input_classifications[,1]) + newdf = NULL + for (grp in names(grps)) { + df = grps[[grp]] + if (dim(df)[1] > max_cells_per_group) { + flog.info(sprintf("-reducing number of cells for grp %s from %g to %g", + grp, dim(df)[1], max_cells_per_group)) + grps[[grp]] = df[sample(1:dim(df)[1], max_cells_per_group),,drop=FALSE] + } + } + input_classifications = data.frame(Reduce(rbind, grps)) + } + + ## restrict expression data to the annotated cells. raw.data <- raw.data[,colnames(raw.data) %in% rownames(input_classifications)] - # reorder cell classifications according to expression matrix column names + ## reorder cell classifications according to expression matrix column names input_classifications <- input_classifications[order(match(row.names(input_classifications), colnames(raw.data))), , drop=FALSE] - # get indices for reference cells + ## get indices for reference cells ref_group_cell_indices = list() for (name_group in ref_group_names) { cell_indices = which(input_classifications[,1] == name_group) + if (length(cell_indices) == 0 ) { stop(sprintf("Error, not identifying cells with classification %s", name_group)) } - cell_names = rownames(input_classifications)[cell_indices] ref_group_cell_indices[[ name_group ]] <- cell_indices } - # rest of the cells are the 'observed' set. + ## rest of the cells are the 'observed' set. all_group_names <- unique(input_classifications[,1]) obs_group_names <- setdiff(all_group_names, ref_group_names) - # extract the genes indicated in the gene ordering file: + ## extract the genes indicated in the gene ordering file: order_ret <- .order_reduce(data=raw.data, genomic_position=gene_order) num_genes_removed = dim(raw.data)[1] - dim(order_ret$exp)[1] @@ -186,25 +264,23 @@ CreateInfercnvObject <- function(raw_counts_matrix, gene_order_file, annotations stop(error_message) } - # define groupings according to the observation annotation names + ## define groupings according to the observation annotation names - obs_group_cell_indices = list() for (name_group in obs_group_names) { cell_indices = which(input_classifications[,1] == name_group) - cell_names = rownames(input_classifications)[cell_indices] obs_group_cell_indices[[ name_group ]] <- cell_indices } - - + object <- new( Class = "infercnv", expr.data = raw.data, count.data = raw.data, gene_order = input_gene_order, reference_grouped_cell_indices = ref_group_cell_indices, - observation_grouped_cell_indices = obs_group_cell_indices) - + observation_grouped_cell_indices = obs_group_cell_indices, + tumor_subclusters = NULL, + .hspike = NULL) validate_infercnv_obj(object) @@ -318,11 +394,11 @@ CreateInfercnvObject <- function(raw_counts_matrix, gene_order_file, annotations remove_genes <- function(infercnv_obj, gene_indices_to_remove) { - infercnv_obj@expr.data <- infercnv_obj@expr.data[ -1 * gene_indices_to_remove, , drop=F] + infercnv_obj@expr.data <- infercnv_obj@expr.data[ -1 * gene_indices_to_remove, , drop=FALSE] - infercnv_obj@count.data <- infercnv_obj@count.data[ -1 * gene_indices_to_remove, , drop=F] + infercnv_obj@count.data <- infercnv_obj@count.data[ -1 * gene_indices_to_remove, , drop=FALSE] - infercnv_obj@gene_order <- infercnv_obj@gene_order[ -1 * gene_indices_to_remove, , drop=F] + infercnv_obj@gene_order <- infercnv_obj@gene_order[ -1 * gene_indices_to_remove, , drop=FALSE] validate_infercnv_obj(infercnv_obj) @@ -396,4 +472,6 @@ get_cell_name_by_grouping <- function(infercnv_obj) { } - +has_reference_cells <- function(infercnv_obj) { + return(length(infercnv_obj@reference_grouped_cell_indices) != 0) +} diff --git a/R/inferCNV_BayesNet.R b/R/inferCNV_BayesNet.R new file mode 100755 index 00000000..51856bc3 --- /dev/null +++ b/R/inferCNV_BayesNet.R @@ -0,0 +1,1181 @@ +#!/usr/bin/env Rscript + +################################## +# create MCMC_inferCNV S4 object # +################################## +#' MCMC_inferCNV class +#' +#' @description Uses Markov Chain Monte Carlo (MCMC) and Gibbs sampling to estimate the posterior +#' probability of being in one of six Copy Number Variation states (states: 0, 0.5, 1, 1.5, 2, 3) for CNV's identified by +#' inferCNV's HMM. Posterior probabilities are found for the entire CNV cluster and each individual +#' cell line in the CNV. +#' +#' @slot bugs_model BUGS model. +#' @slot sig fitted values for cell lines, 1/standard deviation to be used for determining the distribution of each cell line +#' @slot mu Mean values to be used for determining the distribution of each cell line +#' @slot group_id ID's given to the cell clusters. +#' @slot cell_gene List containing the Cells and Genes that make up each CNV. +#' @slot mcmc Simulation output from sampling. +#' @slot combined_mcmc Combined chains for simulation output from sampling. +#' @slot cnv_probabilities Probabilities of each CNV belonging to a particular state from 0 (least likely)to 1 (most likely). +#' @slot cell_probabilities Probabilities of each cell being in a particular state, from 0 (least likely)to 1 (most likely). +#' @slot args Input arguments given by the user +#' @slot cnv_regions ID for each CNV found by the HMM +#' @slot States States that are identified and (depending on posterior MCMC input methods) modified. +#' +#' +#' +#' @return Returns a MCMC_inferCNV_obj +#' @export +#' +# Requires: +# infercnv, rjags, ggplot2, parallel, futile.logger, reshape +## build off of the present S4 object inferCNV_obj to add more slots +MCMC_inferCNV <- setClass("MCMC_inferCNV", slots = c(bugs_model = "character", + sig = "numeric", + mu = "numeric", + group_id = "integer", + cell_gene = "list", + mcmc = "list", + cnv_probabilities = "list", + cell_probabilities = "list", + args = "list", + cnv_regions = "factor", + States = "ANY", + combined_mcmc = "list"), + contains = "infercnv") + + + + +############# +# Accessors # +############# + +#' Access the values for cellGene +#' +#' This function returns the list of values in cellGene +#' +#' @param obj The MCMC_inferCNV_obj S4 object. +#' +#' @return A list. +#' +#' @exportMethod cellGene +#' @rdname cellGene-method +#' +setGeneric(name = "cellGene", + def = function(obj) standardGeneric("cellGene")) +#' @rdname cellGene-method +#' @aliases cellGene +#' +setMethod(f = "cellGene", + signature = "MCMC_inferCNV", + definition=function(obj) obj@cell_gene) + + +####################### +# Object Manipulation # +####################### +#' +#' Get the cell Mean and Standard Deviation for identified cnv regions +#' +#' @param obj The MCMC_inferCNV_obj S4 object. +#' +#' @return obj The MCMC_inferCNV_obj S4 object. +#' +#' @exportMethod MeanSD +#' @rdname MeanSD-method +#' +setGeneric(name="MeanSD", + def=function(obj) + { standardGeneric("MeanSD") } +) + +#' @rdname MeanSD-method +#' @aliases MeanSD +#' +setMethod(f="MeanSD", + signature="MCMC_inferCNV", + definition=function(obj) + { + gene_expr_by_cnv = .get_gene_expr_by_cnv(obj@.hspike) + cnv_mean_sd = .get_gene_expr_mean_sd_by_cnv(gene_expr_by_cnv) + cnv_sd <- cbind(lapply(cnv_mean_sd,function(x){x$sd})) + cnv_mean <- cbind(lapply(cnv_mean_sd,function(x){x$mean})) + ## Sort so in order of {x0,...,x1,..,x3} and get into a vector format + obj@mu <- unlist(cbind(cnv_mean[sort(row.names(cnv_mean)),])) + obj@sig <- unlist(cbind(cnv_sd[sort(row.names(cnv_sd)),])) + obj@sig <- 1/(obj@sig^2) + if (obj@args$quietly == FALSE) { + print(paste("Means: ", obj@mu, collapse = "")) + print(paste("Sig: ", obj@sig, collapse = "")) + } + return(obj) + } +) + +#' Add the probability threshold for the arguments in the MCMC infercnv object. +#' +#' This function adds the variable BayesMaxPNormal to the arguments slot of the the MCMC infercnv object. +#' +#' @param obj The MCMC_inferCNV_obj S4 object. +#' @param BayesMaxPNormal probability to be used as a threshold for CNV or cell removal. +#' +#' @return MCMC_inferCNV_obj S4 object. +#' +#' @exportMethod setBayesMaxPNormal +#' @rdname setBayesMaxPNormal-method +#' +setGeneric(name = "setBayesMaxPNormal", + def = function(obj, BayesMaxPNormal) standardGeneric("setBayesMaxPNormal")) + +#' @rdname setBayesMaxPNormal-method +#' @aliases setBayesMaxPNormal +#' +setMethod(f = "setBayesMaxPNormal", + signature = "MCMC_inferCNV", + definition=function(obj, BayesMaxPNormal) { + obj@args$BayesMaxPNormal <- BayesMaxPNormal + return(obj) + }) + +#' Create a list that holds Genes and Cells for each separate identified CNV +#' +#' @param obj The MCMC_inferCNV_obj S4 object. +#' @param pred_cnv_genes_df Data for genes in each predicted CNV. +#' @param cell_groups_df Data for each cell in the predicted CNV's. +#' +#' @return obj The MCMC_inferCNV_obj S4 object. +#' +#' @exportMethod getGenesCells +#' @rdname getGenesCells-method +#' +setGeneric(name="getGenesCells", + def=function(obj, pred_cnv_genes_df, cell_groups_df) + { standardGeneric("getGenesCells") } +) + +#' @rdname getGenesCells-method +#' @aliases getGenesCells +#' +setMethod(f="getGenesCells", + signature="MCMC_inferCNV", + definition=function(obj, pred_cnv_genes_df, cell_groups_df) + { + ## list that holds Genes and Cells for each separate identified CNV + obj@cell_gene <- lapply(obj@cnv_regions,function(i) { + # subset the data to get the rows for the current CNV + current_cnv <- pred_cnv_genes_df[which(i == pred_cnv_genes_df$gene_region_name),] + # get the index for the genes that are in each cnv + genes <- current_cnv$gene + # pred_cnv_genes_df[which(pred_cnv_genes_df$gene_region_name %in% i),]$gene + gene_idx <- which(row.names(obj@expr.data) %in% genes) + + # get the index for the cells that are in each cnv + sub_cells <- unique(current_cnv$cell_group_name) + cells_idx <- which(colnames(obj@expr.data) %in% cell_groups_df[which(cell_groups_df$cell_group_name %in% sub_cells),]$cell) + return(list("cnv_regions" = i, "Genes" = gene_idx, "Cells" = cells_idx)) + }) + return(obj) + } +) + + + + +#' Initialize the MCMC_inferCNV_obj object +#' +#' @param obj The MCMC_inferCNV_obj S4 object. +#' @param args_parsed The arguments given to the function. +#' @param infercnv_obj InferCNV object. +#' +#' @return obj The MCMC_inferCNV_obj S4 object. +#' +#' @exportMethod initializeObject +#' @rdname initializeObject-method +#' +setGeneric(name="initializeObject", + def=function(obj, args_parsed, infercnv_obj) + { standardGeneric("initializeObject") } +) + +#' @rdname initializeObject-method +#' @aliases initializeObject +#' +setMethod(f="initializeObject", + signature="MCMC_inferCNV", + definition=function(obj, args_parsed, infercnv_obj) + { + futile.logger::flog.info(paste("Initializing new MCM InferCNV Object.")) + files <- list.files(args_parsed$file_dir, full.names = TRUE) + + # Validate the inferCNV Object + validate_infercnv_obj(infercnv_obj) + + ## create the S4 object + obj <- MCMC_inferCNV(infercnv_obj) + ## add the command line arguments + obj@args <- args_parsed + + ## Load the files for cnv predictions + cell_groups_PATH <- files[grep(files, pattern = "_HMM_preds.cell_groupings")] + pred_cnv_genes_PATH <- files[grep(files, pattern = "_HMM_preds.pred_cnv_genes.dat")] + cell_groups_df <- read.table(cell_groups_PATH, header = T, check.names = FALSE, sep="\t") + pred_cnv_genes_df <- read.table(pred_cnv_genes_PATH, header = T, check.names = FALSE, sep="\t") + + # cnv region id's + obj@cnv_regions <- unique(pred_cnv_genes_df$gene_region_name) + futile.logger::flog.info(paste("Total CNV's: ", length(obj@cnv_regions))) + + ## Load Mixture Model File + futile.logger::flog.info(paste("Loading BUGS Model.")) + obj@bugs_model <- readChar(obj@args$model_file,file.info(obj@args$model_file)$size) + + ## list that holds Genes and Cells for each separate identified CNV + obj <- getGenesCells(obj, pred_cnv_genes_df, cell_groups_df) + + # Create numerical ids for each subgroup of cells + ## group name ids + cell_group_id <- unique(pred_cnv_genes_df$cell_group_name) + + ## set numerical id's for cell groups and set values in a vector for cell positions in the matrix + group_id <- rep(NA, max(unlist(obj@observation_grouped_cell_indices))) + lapply(1:length(cell_group_id), function(i) { + ## cells in the cluster group + cells <- cell_groups_df[cell_groups_df$cell_group_name %in% cell_group_id[i],]$cell + ## set the numerical id in the vector + group_id[which(colnames(obj@expr.data) %in% cells)] <<- i + }) + obj@group_id <- group_id + + return(obj) + } +) + + +#' Get the state values from the inferCNV HMM object +#' +#' @param obj The MCMC_inferCNV_obj S4 object. +#' @param HMM_obj The HMM inferCNV object. +#' +#' @return obj The MCMC_inferCNV_obj S4 object. +#' +#' @exportMethod getStates +#' @rdname getStates-method +#' +setGeneric(name="getStates", + def=function(obj, HMM_obj) + { standardGeneric("getStates") } +) + +#' @rdname getStates-method +#' @aliases getStates +#' +setMethod(f="getStates", + signature="MCMC_inferCNV", + definition=function(obj, HMM_obj) + { + # Add the HMM defined states + x <- HMM_obj@expr.data + obj@States <- x + return(obj) + } +) + + +#' Set the probabilities for each CNV belonging to each state as well as probability of each cell belonging to a states +#' +#' @param obj The MCMC_inferCNV_obj S4 object. +#' +#' @return obj The MCMC_inferCNV_obj S4 object. +#' +#' @exportMethod getProbabilities +#' @rdname getProbabilities-method +#' +setGeneric(name="getProbabilities", + def=function(obj) + { standardGeneric("getProbabilities") } +) + +#' @rdname getProbabilities-method +#' @aliases getProbabilities +#' +setMethod(f="getProbabilities", + signature="MCMC_inferCNV", + definition=function(obj) + { + ## List holding state probabilities for each CNV + cnv_probabilities <- list() + ## List for combining the chains in each simulation + combined_samples <- list() + ## list holding the frequency of epsilon values for each cell line + ## for each cnv region and subgroup + cell_probabilities <- list() + + combinedMCMC <- + for(j in 1:length(obj@mcmc)){ + # combine the chains + obj@combined_mcmc[[j]] <- do.call(rbind, obj@mcmc[[j]]) + # run function to get probabilities + ## Thetas + cnv_probabilities[[j]] <- cnv_prob(obj@combined_mcmc[[j]]) + ## Epsilons + cell_probabilities[[j]] <- cell_prob(obj@combined_mcmc[[j]]) + } + + obj@cnv_probabilities <- cnv_probabilities + obj@cell_probabilities <- cell_probabilities + return(obj) + } +) + +#' Run simulations in Parallel +#' +#' @param obj The MCMC_inferCNV_obj S4 object. +#' +#' @return obj The MCMC_inferCNV_obj S4 object. +#' +#' @exportMethod withParallel +#' @rdname withParallel-method +#' +setGeneric(name="withParallel", + def=function(obj) + { standardGeneric("withParallel") } +) + +#' @rdname withParallel-method +#' @aliases withParallel +#' +setMethod(f="withParallel", + signature="MCMC_inferCNV", + definition=function(obj) + { + par_func <- function(i){ + if (obj@args$quietly == FALSE) { + futile.logger::flog.info(paste("Sampleing Number: ", i)) + } + if(!(length(obj@cell_gene[[i]]$Cells) == 0)){ + tumor_grouping <- obj@group_id[ obj@cell_gene[[i]]$Cells ] # subset the tumor ids for the cells wanted + gene_exp <- obj@expr.data[obj@cell_gene[[i]]$Genes, obj@cell_gene[[i]]$Cells] + return(run_gibb_sampling(gene_exp, obj)) + } else { + return(list(NULL)) + } + } + futile.logger::flog.info(paste("Running Sampling Using Parallel with ", obj@args$CORES, "Cores")) + obj@mcmc <- parallel::mclapply(1:length(obj@cell_gene), + FUN = par_func, + mc.cores = as.integer(obj@args$CORES)) + return(obj) + } +) + +#' Run simulations in Non-Parallel mode +#' +#' @param obj The MCMC_inferCNV_obj S4 object. +#' +#' @return obj The MCMC_inferCNV_obj S4 object. +#' +#' @exportMethod nonParallel +#' @rdname nonParallel-method +#' +setGeneric(name="nonParallel", + def=function(obj) + { standardGeneric("nonParallel") } +) + +#' @rdname nonParallel-method +#' @aliases nonParallel +#' +setMethod(f="nonParallel", + signature="MCMC_inferCNV", + definition=function(obj) + { + futile.logger::flog.info(paste("Running Gibbs sampling in Non-Parallel Mode.")) + # Iterate over the CNV's and run the Gibbs sampling. + obj@mcmc <- lapply(1:length(obj@cell_gene), function(i){ + if (obj@args$quietly == FALSE) { + futile.logger::flog.info(paste("Sample Number: ", i)) + } + if(!(length(obj@cell_gene[[i]]$Cells) == 0)){ + tumor_grouping <- obj@group_id[ obj@cell_gene[[i]]$Cells ] # subset the tumor ids for the cells wanted + gene_exp <- obj@expr.data[obj@cell_gene[[i]]$Genes, obj@cell_gene[[i]]$Cells] + return(run_gibb_sampling(gene_exp, obj)) + } else { + return(list(NULL)) + } + }) + return(obj) + } +) + +#' Run simulations and remove CNV's that have a probability of being normal above a set thresholld. +#' This removes possible false posotives identified by the HMM. +#' +#' @param obj The MCMC_inferCNV_obj S4 object. +#' +#' @return obj The MCMC_inferCNV_obj S4 object. +#' +#' @exportMethod removeCNV +#' @rdname removeCNV-method +#' +setGeneric(name="removeCNV", + def=function(obj) + { standardGeneric("removeCNV") } +) + +#' @rdname removeCNV-method +#' @aliases removeCNV +#' +setMethod(f="removeCNV", + signature="MCMC_inferCNV", + definition=function(obj) + { + # Mean values of the probability distribution of the CNV states p(CNV == {states 1:6}) + cnv_means <- sapply(obj@cnv_probabilities,function(i) colMeans(i)) + futile.logger::flog.info(paste("Attempting to removing CNV(s) with a probability of being normal above ", obj@args$BayesMaxPNormal)) + futile.logger::flog.info(paste("Removing ",length(which(cnv_means[3,] > obj@args$BayesMaxPNormal)), " CNV(s) identified by the HMM.")) + if (any(cnv_means[3,] > obj@args$BayesMaxPNormal)){ + remove_cnv <- which(cnv_means[3,] > obj@args$BayesMaxPNormal) + + if (obj@args$quietly == FALSE) { print("CNV's being removed have the following posterior probabilities of being a normal state: ") } + + lapply(remove_cnv, function(i) { + if (obj@args$quietly == FALSE) { + print( paste(obj@cell_gene[[i]]$cnv_regions, ", Genes: ", length(obj@cell_gene[[i]]$Genes), " Cells: ", length(obj@cell_gene[[i]]$Cells)) ) + # print(paste(paste( "Probabilities: "), cnv_means[,i])) + } + ## Change the states to normal states + obj@States[obj@cell_gene[[i]]$Genes , obj@cell_gene[[i]]$Cells ] <<- 3 + }) + ## Remove the CNV's from the following matrices + obj@cell_gene <- obj@cell_gene[-remove_cnv] + obj@cell_probabilities <- obj@cell_probabilities[-remove_cnv] + obj@cnv_probabilities <- obj@cnv_probabilities[-remove_cnv] + cnv_means <- cnv_means[,-remove_cnv] + # obj@mcmc <- obj@mcmc[-remove_cnv] + # obj@combined_mcmc <- obj@combined_mcmc[-remove_cnv] + futile.logger::flog.info(paste("Total CNV's after removing: ", length(obj@cell_gene))) + } + + # Write the state probabilities for each CNV to a table. + ## set column names to the CNV ID + cnv_regions <- sapply(obj@cell_gene, function(i) { as.character(i$cnv_regions) }) + colnames(cnv_means) <- cnv_regions + ## set row names to the states 1:6 + row.names(cnv_means) <- c(sprintf("State:%s",1:6)) + write.table(cnv_means,file = file.path(obj@args$out_dir, "CNV_State_Probabilities.dat"), col.names = TRUE, row.names=TRUE, quote=FALSE, sep="\t") + return(obj) + } +) + +#' Run simulations and remove cells from cnv's that are predicted to be normal +#' +#' @param obj The MCMC_inferCNV_obj S4 object. +#' +#' @return obj The MCMC_inferCNV_obj S4 object. +#' +#' @exportMethod removeCells +#' @rdname removeCells-method +#' +setGeneric(name="removeCells", + def=function(obj) + { standardGeneric("removeCells") } +) + +#' @rdname removeCells-method +#' @aliases removeCells +#' +setMethod(f="removeCells", + signature="MCMC_inferCNV", + definition=function(obj) + { + if (any(do.call(cbind, obj@cell_probabilities)[3,] > obj@args$BayesMaxPNormal)){ + lapply(1:length(obj@cell_probabilities), function(i) { + idx <- which(obj@cell_probabilities[[i]][3,] > obj@args$BayesMaxPNormal) + if(length(idx) > 0){ + ## change the states to normal states + obj@States[ obj@cell_gene[[i]]$Genes , obj@cell_gene[[i]]$Cells[ idx ] ] <<- 3 + ## remove these cells from the cnv + obj@cell_gene[[i]]$Cells <<- obj@cell_gene[[i]]$Cells[- idx] + } + }) + # recursively run again + obj <- runMCMC(obj) + } + return(obj) + } +) + +#' Run simulations using rjags. +#' +#' Run MCMC simulations using rjags. Also returns a plot the probability of each CNV being +#' normal before running any kind of post MCMC modification. +#' +#' @param obj The MCMC_inferCNV_obj S4 object. +#' +#' @return obj The MCMC_inferCNV_obj S4 object. +#' +#' @exportMethod runMCMC +#' @rdname runMCMC-method +#' +setGeneric(name="runMCMC", + def=function(obj) + { standardGeneric("runMCMC") } +) + +#' @rdname runMCMC-method +#' @aliases runMCMC +#' +setMethod(f="runMCMC", + signature="MCMC_inferCNV", + definition=function(obj) + { + # Run MCMC + if(obj@args$CORES == 1){ + obj <- nonParallel(obj) + } else { + obj <- withParallel(obj) + } + + # Get the probability of of each cell line and complete CNV belonging to a specific state + obj <- getProbabilities(obj) + + return(obj) + } +) + + +########################## +# Plotting functions # +########################## +#' Get the probability of each cnv being a normal state and plot these probabilities. +#' +#' @param obj The MCMC_inferCNV_obj S4 object. +#' @param PNormal Option to add specific title to plot. +#' +#' @return obj The MCMC_inferCNV_obj S4 object. +#' +#' @exportMethod postProbNormal +#' @rdname postProbNormal-method +#' +setGeneric(name="postProbNormal", + def=function(obj, PNormal) + { standardGeneric("postProbNormal") } +) + +#' @rdname postProbNormal-method +#' @aliases postProbNormal +#' +setMethod(f="postProbNormal", + signature="MCMC_inferCNV", + definition=function(obj, PNormal) + { + if (obj@args$plotingProbs == TRUE){ + # get probability of the cnv's belonging to each state + cnv_means <- sapply(obj@cnv_probabilities,function(i) colMeans(i)) + # Adjust the probabilities so greater probability corresponds to less likely to be normal + normal_prob <- 1 - cnv_means[3,] + obj@expr.data[,] <- 0 + lapply(1:length(normal_prob), function(i) { + ## change the states to normal states + obj@expr.data[obj@cell_gene[[i]]$Genes , obj@cell_gene[[i]]$Cells ] <<- normal_prob[i] + }) + if (!is.null(PNormal)){ + title <- sprintf(" (1 - Probabilities of Normal) With Threshold %s",obj@args$BayesMaxPNormal) + }else{ + title <- sprintf(" (1 - Probabilities of Normal) Before Filtering") + } + infercnv::plot_cnv(infercnv_obj = obj, + #k_obs_groups = 4, + #cluster_by_groups = cluster_by_groups, + title = title, + output_filename = file.path(file.path(obj@args$out_dir),"infercnv.NormalProbabilities"), + write_expr_matrix = FALSE, + x.center = 0, + x.range = c(0,1) + ) + } + } +) + +#' Plots the probability for each cnv belonging to a specific state and the probability of +#' each cell line belonging to a specific states. +#' +#' @param obj The MCMC_inferCNV_obj S4 object. +#' +#' @return obj The MCMC_inferCNV_obj S4 object. +#' +#' @exportMethod plotProbabilities +#' @rdname plotProbabilities-method +#' +setGeneric(name="plotProbabilities", + def=function(obj) + { standardGeneric("plotProbabilities") } +) + +#' @rdname plotProbabilities-method +#' @aliases plotProbabilities +#' +setMethod(f="plotProbabilities", + signature="MCMC_inferCNV", + definition=function(obj) + { + if (obj@args$plotingProbs == TRUE){ + futile.logger::flog.info(paste("Creating Plots for CNV and cell Probabilities.")) + # Plotting + ## plots the probability of each cell line being a particular state + ## plots the probability of a cnv being a particular state + + # Plot the probabilities of epsilons + ep <- function(df){df[,grepl('epsilon', colnames(df))]} + epsilons <- lapply(obj@combined_mcmc, function(x) ep(x)) + ## add threshold to the plot title if given + if (!is.null(obj@args$BayesMaxPNormal)) { + file_CELLplot <- sprintf("cellProbs.%s.pdf",obj@args$BayesMaxPNormal) + } else{ + file_CELLplot <- "cellProbs.pdf" + } + pdf(file = file.path(file.path(obj@args$out_dir),file_CELLplot), onefile = TRUE) + lapply(1:length(obj@cell_probabilities), function(i){ + print(plot_cell_prob(as.data.frame(obj@cell_probabilities[[i]]), as.character(obj@cell_gene[[i]]$cnv_regions))) + }) + dev.off() + + ## Plot the probability of each state for a CNV + ## add threshold to the plot title if given + if (!is.null(obj@args$BayesMaxPNormal)) { + file_CNVplot <- sprintf("cnvProbs.%s.pdf",obj@args$BayesMaxPNormal) + } else{ + file_CNVplot <- "cnvProbs.pdf" + } + pdf(file = file.path(file.path(obj@args$out_dir), file_CNVplot), onefile = TRUE) + lapply(1:length(obj@cell_probabilities), function(i){ + print(plot_cnv_prob(obj@cnv_probabilities[[i]], as.character(obj@cell_gene[[i]]$cnv_regions))) + }) + dev.off() + } + } +) + + +#' Return the InferCNV Object with the new adjucted CNV's +#' +#' Returns Infercnv Object +#' +#' @param obj The MCMC_inferCNV_obj S4 object. +#' @param infercnv_obj Current inferCNV object that will be adjusted based on the results of the Bayesian Network Model. +#' +#' @return An inferCNV object +#' +#' @exportMethod returningInferCNV +#' @rdname returningInferCNV-method +#' +#' @export + +setGeneric(name = "returningInferCNV", + def = function(obj, infercnv_obj) + { standardGeneric("returningInferCNV") } +) +#' @rdname returningInferCNV-method +#' @aliases returningInferCNV +#' +setMethod(f = "returningInferCNV", + signature = "MCMC_inferCNV", + definition=function(obj, infercnv_obj) { + NewStates <- obj@States + infercnv_obj@expr.data <- NewStates + return(infercnv_obj) + } +) + + +#' Create Diagnostic Plots And Summaries. +#' +#' Create Diagnostic Plots And Summaries in order to determine if convergence has occured. +#' +#' @param obj The MCMC_inferCNV_obj S4 object. +#' +#' @return obj The MCMC_inferCNV_obj S4 object. +#' +#' @exportMethod mcmcDiagnosticPlots +#' @rdname mcmcDiagnosticPlots-method +#' +#' +setGeneric(name="mcmcDiagnosticPlots", + def=function(obj) + { standardGeneric("mcmcDiagnosticPlots") } +) + +#' @rdname mcmcDiagnosticPlots-method +#' @aliases mcmcDiagnosticPlots +#' +#' +setMethod(f="mcmcDiagnosticPlots", + signature="MCMC_inferCNV", + definition=function(obj) + { + futile.logger::flog.info(paste("Creating Diagnostic Plots.")) + ########################### + # trace and denisty plots + ########################### + #-------------------------------------- + # trace and denisty plots for each cnv + #-------------------------------------- + ## get the theta values + if (obj@args$quietly == FALSE) { futile.logger::flog.info(paste("Plotting CNV Trace and Density Plots.")) } + cnvProb <- function(combined_samples) { + thetas <- combined_samples[,grepl('theta', colnames(combined_samples))] + } + cnvMCMCList <- lapply(1:length(obj@mcmc), function(i){ + lapply(obj@mcmc[[i]], cnvProb) + }) + # trace and denisty plots + pdf(file = file.path(file.path(obj@args$out_dir),"CNVDiagnosticPlots.pdf"), onefile = TRUE) + lapply(1:length(cnvMCMCList), function(i){ + plot(coda::mcmc.list(cnvMCMCList[[i]])) + }) + dev.off() + + #--------------------------------------- + # trace and denisty plots for each cell + #--------------------------------------- + ## get the theta values + if (obj@args$quietly == FALSE) { futile.logger::flog.info(paste("Plotting Cell Trace and Density Plots.")) } + cellProb <- function(samples) { + epsilons <- samples[,grepl('epsilon', colnames(samples))] + } + + cellMCMCList <- lapply(1:length(obj@mcmc), function(i){ + lapply(obj@mcmc[[i]], cellProb) + }) + # trace and denisty plots + pdf(file = file.path(file.path(obj@args$out_dir),"CellDiagnosticPlots.pdf"), onefile = TRUE) + lapply(1:length(cellMCMCList), function(i){ + plot(coda::mcmc.list(cellMCMCList[[i]])) + }) + dev.off() + + + ########################### + # Auto Correlation Plots + ########################### + #--------------------------------------- + # Auto Correlation for each CNV + #--------------------------------------- + if (obj@args$quietly == FALSE) { futile.logger::flog.info(paste("Plotting CNV Autocorrelation Plots.")) } + pdf(file = file.path(file.path(obj@args$out_dir),"CNVautocorrelationPlots.pdf"), onefile = TRUE) + lapply(1:length(cnvMCMCList), function(i){ + autocorr.plot(coda::mcmc.list(cnvMCMCList[[i]])) + }) + dev.off() + + ########################### + # Gelman Plots + ########################### + #--------------------------------------- + # Gelman for each CNV + #--------------------------------------- + if (obj@args$quietly == FALSE) { futile.logger::flog.info(paste("Plotting CNV Gelman Plots.")) } + pdf(file = file.path(file.path(obj@args$out_dir),"CNVGelmanPlots.pdf"), onefile = TRUE) + lapply(1:length(cellMCMCList), function(i){ + gelman.plot(coda::mcmc.list(cnvMCMCList[[i]])) + }) + dev.off() + + ########################### + # Summary Tables + ########################### + if (obj@args$quietly == FALSE) { futile.logger::flog.info(paste("Creating CNV Statistical Summary Tables.")) } + # Function to initialize the summary tables + theta_table <- function(x,y,w){ + mu<- unlist(summary(x[[1]][,w])[[1]][,1]) + stdev<- unlist(summary(x[[1]][,w])[[1]][,2]) + q2.5<- unlist(summary(x[[1]][,w])[[2]][,1]) + q50<- unlist(summary(x[[1]][,w])[[2]][,3]) + q97.5<- unlist(summary(x[[1]][,w])[[2]][,5]) + gewek = unlist(geweke.diag(x[[1]][,w], frac1=0.1, frac2=0.5))[1:length(w)] + df = data.frame(mu,stdev,q2.5,q50,q97.5,gewek) + colnames(df) <- c('Mean','St.Dev','2.5%','50%','97.5%', "Geweke") + rownames(df) <- c(w) + #return(knitr::kable(df, caption = y)) + return(df) + } + # Function to get the theta (state CNV probabilities) values + getThetas <- function(df){ df[,grepl('theta', colnames(df))] } + # List of statistical summary tables + summary_table <- lapply(1:length(obj@mcmc), function(i) { + title <- sprintf("CNV %s Summary Table", obj@cell_gene[[i]]$cnv_regions) + thetas <- lapply(obj@mcmc[[i]], function(x) getThetas(x)) + w = row.names(summary(as.mcmc(thetas))[[1]]) + return(theta_table(coda::as.mcmc(thetas), title, w)) + }) + # Theme for the grob tables + theme.1 <- gridExtra::ttheme_default(core = list(fg_params = list(parse=TRUE, cex = 0.5)), + colhead = list(fg_params=list(parse=TRUE, cex = 0.5)), + rowhead = list(fg_params=list(parse=TRUE, cex = 0.5))) + # List of tables, table for each CNV + plot_list <- lapply(1:length(summary_table), function(i) { + ## Create table grob object + table <- gridExtra::tableGrob(summary_table[[i]],rows = c("State 1","State 2","State 3","State 4","State 5","State 6"), theme = theme.1) + ## Create the title for the table as a seperate grob object + title <- sprintf("%s CNV Summary Table", obj@cell_gene[[i]]$cnv_regions) + title <- gridExtra::tableGrob(summary_table[[i]][1,1],rows=NULL, cols=c(title)) + ## Combine the summary table grob and the title grob + tab <- gridExtra::gtable_combine(title[1,], table, along=2) + # Adjust the position of the title + tab$layout[1, c("l", "r")] <- c(7, 2) + tab$layout[2, c("l", "r")] <- c(7, 2) + return(tab) + }) + # Combine all the tablles together as one column + test <- gridExtra::gtable_combine(plot_list, along = 2) + # Save the tables to a PDF document + pdf(file = file.path(file.path(obj@args$out_dir),"CNVSummaryTablels.pdf") , paper = "a4", onefile = TRUE, height = 0, width = 0) + print(gridExtra::marrangeGrob(grobs = test, nrow = 5, ncol = 1)) + dev.off() + } +) + + + +########################## +# Command line arguments # +########################## +# pargs <- optparse::OptionParser() +pargs <- argparse::ArgumentParser() +pargs$add_argument(c("-f", "--infercnv_dir"), + type="character", + action='store_true', + dest="file_dir", + metavar="File_Directory", + help=paste("Path to files created by inferCNV.", + "[Default %default][REQUIRED]")) +pargs$add_argument(c("-m", "--model"), + type="character", + action='store_true', + dest="model_file", + metavar="Model_File_Path", + help=paste("Path to the BUGS Model file.", + "[Default %default][REQUIRED]")) +pargs$add_argument(c("-p","--parallel"), + type="character", + action='store_true', + dest="CORES", + default = NULL, + metavar="Number_of_Cores", + help=paste("Option to run parallel by specifying the number of cores to be used.", + "[Default %default]")) +pargs$add_argument(c("-o","--out_dir"), + type="character", + action='store_true', + dest="out_dir", + default = NULL, + metavar="Output_Directory", + help=paste("Option to set the output directory to save the outputs.", + "[Default %default]")) +pargs$add_argument(c("-M","--method"), + type="character", + action='store_true', + dest="postMcmcMethod", + default = NULL, + metavar="Posterior_MCMC_Method", + help=paste("What actions to take after finishing the MCMC.", + "[Default %default]")) +pargs$add_argument(c("-x","--plot"), + type="logical", + action='store_true', + dest="plotingProbs", + default = TRUE, + metavar="Plot_Probabilities", + help=paste("Plot the posterior probabilites for each CNV and each cell line in each cnv.", + "[Default %default]")) + +# Function to run the mixture model for given expression data +# Runs on each cnv seperately. Then seperate by tumor subgroup. +# input: +# gene_exp : Gene expression data +# MCMC_inferCNV_obj : MCMC_inferCNV object +# return: +# samples : Results of the sampleing process +# +run_gibb_sampling <- function(gene_exp, + MCMC_inferCNV_obj + ){ + if (is.null(ncol(gene_exp))){ + gene_exp <- data.frame(gene_exp) + } + C = ncol(gene_exp) + G = nrow(gene_exp) + if (MCMC_inferCNV_obj@args$quietly == FALSE) { + futile.logger::flog.info(paste("Cells: ",C)) + futile.logger::flog.info(paste("Genes: ",G)) + } + # quiet=FALSE + # make Data list for model + data <- list( + 'C' = C, # number of cell lines + 'G' = G, # number of genes + 'gexp' = gene_exp, # expression data + 'sig' = MCMC_inferCNV_obj@sig, # fitted values for cell lines, 1/standard deviation to be used for determining the distribution of each cell line + 'mu' = MCMC_inferCNV_obj@mu # Mean values to be used for determining the distribution of each cell line + ) + # set initial values for each cell line begining states + inits <- list( + list(epsilon = rep(1, C)), + list(epsilon = rep(2, C)), + list(epsilon = rep(3, C)), + list(epsilon = rep(4, C)), + list(epsilon = rep(5, C)), + list(epsilon = rep(6, C)) + ) + # Create the model for rjags + model <- rjags::jags.model(textConnection(MCMC_inferCNV_obj@bugs_model), + data=data, + inits=inits, # (Initialization) optional specification of initial values in the form of a list or a function + n.chains=6, # the number of parallel chains for the model + n.adapt=500, # the number of iterations for adaptation (burn in) + quiet=MCMC_inferCNV_obj@args$quietly) + stats::update(model, 200, progress.bar=ifelse(MCMC_inferCNV_obj@args$quietly,"none","text")) + # run the rjags model + ## set the parameters to return from sampling + parameters <- c('theta', 'epsilon') + samples <- rjags::coda.samples(model, parameters, n.iter=1000, progress.bar=ifelse(MCMC_inferCNV_obj@args$quietly,"none","text")) + return(samples) +} + +# Function to plot the probability for each cell line of being in a particular state +plot_cell_prob <- function(df, title){ + df$mag = c(1:6) + long_data <- reshape::melt(df, id = "mag") + long_data$mag <- as.factor(long_data$mag) + ggplot2::ggplot(long_data, ggplot2::aes_string(x = 'variable', y = 'value', fill = 'mag'))+ + ggplot2::geom_bar(stat="identity", width = 1) + + ggplot2::coord_flip() + + ggplot2::theme( + panel.grid = ggplot2::element_blank(), panel.background = ggplot2::element_blank(),panel.border = ggplot2::element_blank(), + axis.text=ggplot2::element_text(size=20), + plot.title = ggplot2::element_text(hjust = 0.5,size = 22), + #legend.position = "none", + legend.position="bottom", + axis.text.x = ggplot2::element_text(size = 16), + axis.text.y = ggplot2::element_text(size = 16), + axis.title.x = ggplot2::element_text(size = 18), + axis.title.y = ggplot2::element_text(size = 18))+ + ggplot2::labs(title = title) + + #fill = "CNV States") + + ggplot2::xlab("Cell") + + ggplot2::ylab("Probability")+ + ggplot2::labs(fill = "States")+ + ggplot2::scale_x_discrete(breaks =seq(1, ncol(df), 9)) +} + +# Function for total CNV probaility of belonging to each state using THETA prior +cnv_prob <- function(combined_samples) { + thetas <- combined_samples[,grepl('theta', colnames(combined_samples))] + #print(paste("Thetas: ", dim(thetas))) + return(thetas) +} + +# Function for each individule cell probabilities, marginalize over the EPSILONS +cell_prob <- function(combined_samples) { + epsilons <- combined_samples[,grepl('epsilon', colnames(combined_samples))] + #print(paste("Epsilons: ", dim(epsilons))) + epsilon_state_frequencies <- apply(as.data.frame(epsilons), 2, function(x) table(factor(x, levels=1:6))) + cell_probs <- epsilon_state_frequencies/colSums(epsilon_state_frequencies) + return(cell_probs) +} + +## Fucntion to Plot the probability of each state for a CNV +plot_cnv_prob <- function(df,title){ + colnames(df) <- c(1:6) + df <- melt(df) + colnames(df) <- c("row", "State", "Probability") + states <- as.factor(df$State) + ggplot2::ggplot(data = df, ggplot2::aes_string(y = 'Probability', x= 'State', fill = 'states')) + + ggplot2::geom_boxplot()+ + ggplot2::labs(title = title) + + ggplot2::theme(plot.title = element_text(hjust = 0.5)) +} + + +############################################### +# Main Function to run Bayesion Network Model # +############################################### +#' @title inferCNVBayesNet: Run Bayesian Network Mixture Model To Obtain Posterior Probabilities For HMM Predicted States +#' +#' @description Uses Markov Chain Monte Carlo (MCMC) and Gibbs sampling to estimate the posterior +#' probability of being in one of six Copy Number Variation states (states: 0, 0.5, 1, 1.5, 2, 3) for CNV's identified by +#' inferCNV's HMM. Posterior probabilities are found for the entire CNV cluster and each individual +#' cell line in the CNV. +#' +#' @param file_dir Location of the directory of the inferCNV outputs. +#' @param infercnv_obj InferCNV object. +#' @param HMM_obj InferCNV object with HMM states in expression data. +#' @param model_file Path to the BUGS Model file. +#' @param CORES Option to run parallel by specifying the number of cores to be used. (Default: 1) +#' @param out_dir (string) Path to where the output file should be saved to. +#' @param postMcmcMethod What actions to take after finishing the MCMC. +#' @param plotingProbs Option for adding plots of Cell and CNV probabilities. (Default: TRUE) +#' @param quietly Option to print descriptions along each step. (Default: TRUE) +#' @param diagnostics Option to plot Diagnostic plots and tables. (Default: FALSE) +#' +#' @return Returns a MCMC_inferCNV_obj and posterior probability of being in one of six Copy Number Variation states +#' (states: 0, 0.5, 1, 1.5, 2, 3) for CNV's identified by inferCNV's HMM. +#' +#' @export + +inferCNVBayesNet <- function( + file_dir, + infercnv_obj, + HMM_obj, + out_dir, + model_file = system.file("BUGS_Mixture_Model",package = "infercnv"), + CORES = 1, + postMcmcMethod = NULL, + plotingProbs = TRUE, + quietly = TRUE, + diagnostics = FALSE) { + + ################ + # CHECK INPUTS # + ################ + if (!file.exists(file_dir)){ + error_message <- paste("Cannot find the supplied directory location for the infercnv output.", + "Please supply teh correct path for the output.") + futile.logger::flog.error(error_message) + stop(error_message) + } + if (!file.exists(model_file)){ + error_message <- paste("Cannot find the model file.", + "Please supply the correct path for the model file.") + futile.logger::flog.error(error_message) + stop(error_message) + } + if (!(CORES == 1)){ + if (as.integer(CORES) > detectCores()){ + error_message <- paste("Too many cores previded. The following system has ",detectCores(), " cores.", + "Please select an appropriate amount.") + futile.logger::flog.error(error_message) + stop(error_message) + } + } + + if(out_dir != "." & !file.exists(out_dir)){ + # create the output directory + dir.create(file.path(out_dir)) + futile.logger::flog.info(paste("Creating the following Directory: ", out_dir)) + } + args_parsed <- list("file_dir" = file_dir, + "model_file" = model_file, + "CORES" = CORES, + "out_dir"= out_dir, + "plotingProbs" = TRUE, + "postMcmcMethod"=postMcmcMethod, + "quietly" = quietly, + "BayesMaxPNormal" = 0) + ################################# + # LOAD DATA & INITIALIZE OBJECT # + ################################# + + ## create the S4 object + MCMC_inferCNV_obj <- new("MCMC_inferCNV") + MCMC_inferCNV_obj <- initializeObject(MCMC_inferCNV_obj, args_parsed, infercnv_obj) + MCMC_inferCNV_obj <- getStates(MCMC_inferCNV_obj, HMM_obj) + + ############# + # MEAN & SD # + ############# + ## Get mean and sd of expression in the predicted cnv areas + MCMC_inferCNV_obj <- MeanSD(MCMC_inferCNV_obj) + + + # check and print the number of genes in each cnv + if (args_parsed$quietly == FALSE) { + number_of_genes <- sapply(cellGene(MCMC_inferCNV_obj), function(i) length(i$Genes)) + print(paste("Number of genes for each CNV: ", paste(number_of_genes, sep = " ",collapse = " "))) + # check the lengths of each cell group + sapply(cellGene(MCMC_inferCNV_obj), function(x) length(x$Cells)) + } + + ################################ + # Run MCMC Sampling # + ################################ + ## Run Gibbs sampling and time the process + start_time <- Sys.time() + MCMC_inferCNV_obj <- runMCMC(MCMC_inferCNV_obj) + end_time <- Sys.time() + futile.logger::flog.info(paste("Gibbs sampling time: ", difftime(end_time, start_time, units = "min")[[1]], " Minutes")) + + ## Save the MCMC.infercnv_object as an RDS + saveRDS(MCMC_inferCNV_obj, file = file.path(MCMC_inferCNV_obj@args$out_dir, "MCMC_inferCNV_obj.rds")) + + ######## + # Plot # + ######## + if (diagnostics == TRUE){ + mcmcDiagnosticPlots(MCMC_inferCNV_obj) + } + postProbNormal(MCMC_inferCNV_obj, + PNormal = NULL) + + return(MCMC_inferCNV_obj) +} + +############################################################# +# Function to modify CNV's identified base on probabilities # +############################################################# +#' @title filterHighPNormals: Filter the HMM identified CNV's by the CNV's posterior probability +#' of belonging to a normal state. +#' +#' @description The following function will filter the HMM identified CNV's by the CNV's posterior +#' probability of belonging to a normal state identified by the function inferCNVBayesNet(). Will filter +#' CNV's based on a user desired threshold probability. Any CNV with a probability of being normal above +#' the threshold will be removed. +#' +#' @param MCMC_inferCNV_obj MCMC infernCNV object. +#' @param BayesMaxPNormal Option to filter CNV or cell lines by some probability threshold. +#' +#' @return Returns a MCMC_inferCNV_obj With removed CNV's. +#' +#' @export + +filterHighPNormals <- function( MCMC_inferCNV_obj, + BayesMaxPNormal) { + MCMC_inferCNV_obj <- setBayesMaxPNormal( obj = MCMC_inferCNV_obj, + BayesMaxPNormal = BayesMaxPNormal ) + ## Either Remove CNV's based on CNV posterier probabilities ("removeCNV") + ## or remove cell lines based on cell line posterior probabilities ("removeCells") + if(!(is.null(MCMC_inferCNV_obj@args$postMcmcMethod))){ + if(MCMC_inferCNV_obj@args$postMcmcMethod == "removeCNV"){ + MCMC_inferCNV_obj <- removeCNV(MCMC_inferCNV_obj) + } else { + MCMC_inferCNV_obj <- removeCells(MCMC_inferCNV_obj) + } + } + + ## Plot the resuls + + plotProbabilities(MCMC_inferCNV_obj) + postProbNormal(MCMC_inferCNV_obj, + PNormal = TRUE) + + return(MCMC_inferCNV_obj) +} + +########################## +# Command Line Arguments # +########################## +## Uncomment to use the command line arguments +# if (!is.null(args)){ +# # Set Constants +# args_parsed <- optparse::parse_args(pargs) +# file_dir <- args_parsed$file_dir +# model_file <- args_parsed$model_file +# CORES <- args_parsed$CORES +# out_dir <- args_parsed$out_dir +# inferCNVBayesNet(file_dir, +# model_file, +# CORES, +# out_dir) +# } diff --git a/R/inferCNV_HMM.R b/R/inferCNV_HMM.R new file mode 100644 index 00000000..15fa7c88 --- /dev/null +++ b/R/inferCNV_HMM.R @@ -0,0 +1,1000 @@ + +#' @title get_spike_dists +#' +#' @description determines the N(mean,sd) parameters for each of the CNV states based on +#' the in silico spike in data (hspike). +#' +#' @param hspike_obj hidden spike object +#' +#' @return cnv_mean_sd list +#' +#' @export + +get_spike_dists <- function(hspike_obj) { + + if (is.null(hspike_obj)) { + flog.error("get_spike_dists(hspike_obj): Error, hspike obj is null") + stop("Error") + } + if (! is.null(hspike_obj@.hspike)) { + flog.error("get_spike_dists() should have an hspike obj as param, and this doesn't look like an hspike") + } + + gene_expr_by_cnv = .get_gene_expr_by_cnv(hspike_obj) + + cnv_mean_sd = .get_gene_expr_mean_sd_by_cnv(gene_expr_by_cnv) + + return(cnv_mean_sd) + +} + + +#' @title .get_gene_expr_by_cnv +#' +#' @description builds a list containing all intensities corresponding to each of the +#' spiked-in cnv levels. +#' +#' @param hspike_obj +#' +#' @return gene_expr_by_cnv list, keyed on cnv level, value as vector of residual expr intensities +#' +#' @noRd + +.get_gene_expr_by_cnv <- function(hspike_obj) { + + chr_info = .get_hspike_chr_info(1,1) # dummy values for now + + spike_cell_idx = unlist(hspike_obj@observation_grouped_cell_indices) + + spike.expr.data = hspike_obj@expr.data[, spike_cell_idx] + + gene_expr_by_cnv = list() + for (info in chr_info) { + chr_name = info$name + cnv = sprintf("cnv:%g", info$cnv) + chr_gene_idx = which(hspike_obj@gene_order$chr == chr_name) + gene.expr = c(spike.expr.data[chr_gene_idx, ]) + + if (cnv %in% names(gene_expr_by_cnv)) { + gene_expr_by_cnv[[cnv]] = c(gene_expr_by_cnv[[cnv]], gene.expr) + } else { + gene_expr_by_cnv[[cnv]] = gene.expr + } + } + + return(gene_expr_by_cnv) +} + + + + +#' @title .get_gene_expr_mean_sd_by_cnv +#' +#' @description extracts the N(mean,sd) params for each of the cnv levels based +#' on the list of residual expr intensities for each cnv level +#' +#' @param gene_expr_by_cnv data table +#' +#' @return cnv_mean_sd list +#' +#' @noRd + +.get_gene_expr_mean_sd_by_cnv <- function(gene_expr_by_cnv) { + + cnv_mean_sd = list() + + for (cnv_level in names(gene_expr_by_cnv) ) { + gene_expr = gene_expr_by_cnv[[ cnv_level ]] + + gene_expr_mean = mean(gene_expr) + gene_expr_sd = sd(gene_expr) + + cnv_mean_sd[[ cnv_level ]] = list(mean=gene_expr_mean, sd=gene_expr_sd) + } + + return(cnv_mean_sd) + +} + + +#' @title .plot_gene_expr_by_cnv +#' +#' @description generates a plot showing the residual expresion intensity distribution along with the +#' reference theoretical densities for each of the corresponding parameterized normal distributions. +#' +#' @param gene_expr_by_cnv list +#' +#' @param cnv_mean_sd list +#' +#' @return ggplot2_plot +#' +#' @noRd + +.plot_gene_expr_by_cnv <- function(gene_expr_by_cnv, cnv_mean_sd) { + + df = do.call(rbind, lapply(names(gene_expr_by_cnv), function(x) { data.frame(cnv=x, expr=gene_expr_by_cnv[[x]]) })) + + p = df %>% ggplot(aes(expr, fill='cnv', colour='cnv')) + geom_density(alpha=0.1) + + p = p + + stat_function(fun=dnorm, color='black', args=list('mean'=cnv_mean_sd[["cnv:0.01"]]$mean,'sd'=cnv_mean_sd[["cnv:0.01"]]$sd)) + + stat_function(fun=dnorm, color='black', args=list('mean'=cnv_mean_sd[["cnv:0.5"]]$mean,'sd'=cnv_mean_sd[["cnv:0.5"]]$sd)) + + stat_function(fun=dnorm, color='black', args=list('mean'=cnv_mean_sd[["cnv:1"]]$mean,'sd'=cnv_mean_sd[["cnv:1"]]$sd)) + + stat_function(fun=dnorm, color='black', args=list('mean'=cnv_mean_sd[["cnv:1.5"]]$mean,'sd'=cnv_mean_sd[["cnv:1.5"]]$sd)) + + stat_function(fun=dnorm, color='black', args=list('mean'=cnv_mean_sd[["cnv:2"]]$mean,'sd'=cnv_mean_sd[["cnv:2"]]$sd)) + + stat_function(fun=dnorm, color='black', args=list('mean'=cnv_mean_sd[["cnv:3"]]$mean,'sd'=cnv_mean_sd[["cnv:3"]]$sd)) + + return(p) +} + + +#' @title get_hspike_cnv_mean_sd_trend_by_num_cells_fit +#' +#' @description determine the number of cells - to - variance fit for each of the cnv levels. +#' +#' Different numbers of cells are randomly selected from the distribution of residual intensitites at each +#' corresponding CNV level, the variance is computed, and a linear model is then fit. +#' +#' Note, this is similar to what is done in HoneyBadger, but has many differences to how they're doing it there, +#' which appears to involve cnv block length rather than cell number. Here, block size is not relevant, but rather +#' the number of cells in a pre-defined tumor subcluster. Also, values are extracted from our in silico spike-in. +#' +#' @param hspike_obj hidden spike object +#' +#' @param plot (boolean flag, default FALSE) +#' +#' @return cnv_level_to_mean_sd_fit list +#' +#' @export + +get_hspike_cnv_mean_sd_trend_by_num_cells_fit <- function(hspike_obj, plot=FALSE) { + + gene_expr_by_cnv <- .get_gene_expr_by_cnv(hspike_obj) + cnv_level_to_mean_sd = list() + + for (cnv_level in names(gene_expr_by_cnv) ) { + expr_vals = gene_expr_by_cnv[[ cnv_level ]] + nrounds = 100 + + sds = c() + for (ncells in seq_len(100)) { + means = c() + + for(i in 1:nrounds) { + vals = sample(expr_vals, size=ncells, replace=TRUE) + m_val = mean(vals) + means = c(means, m_val) + } + sds = c(sds, sd(means)) + } + cnv_level_to_mean_sd[[ cnv_level ]] <- sds + } + + if (plot) { + df = do.call(rbind, lapply(names(cnv_level_to_mean_sd), function(cnv_level) { + sd=cnv_level_to_mean_sd[[ cnv_level ]] + data.frame(cnv=cnv_level, num_cells=1:length(sd), sd=sd) + })) + + p = df %>% ggplot(aes_string(x=log(num_cells),y=log(sd), color='cnv')) + geom_point() + pdf("hspike_obs_mean_var_trend.pdf") + flog.info("plotting: hspike_obs_mean_var_trend.pdf") + plot(p) + dev.off() + } + + ## fit linear model + cnv_level_to_mean_sd_fit = list() + for (cnv_level in names(cnv_level_to_mean_sd) ) { + sd_vals=cnv_level_to_mean_sd[[ cnv_level ]] + num_cells = 1:length(sd_vals) + + flog.info(sprintf("fitting num cells vs. variance for cnv level: %s", cnv_level)) + fit = lm(log(sd_vals) ~ log(num_cells)) #note, hbadger does something similar, but not for the hmm cnv state levels + cnv_level_to_mean_sd_fit[[ cnv_level ]] = fit + } + + return(cnv_level_to_mean_sd_fit) + +} + + + + +#' @title .get_HMM +#' +#' @description retrieves parameters for the i6 HMM including state transition and state emission probabilities. +#' +#' @param cnv_mean_sd list containing the N(mean,sd) values per cnv state level +#' +#' @param t the probability for transitioning to a different state. +#' +#' @return HMM_info list +#' +#' @noRd + + +.get_HMM <- function(cnv_mean_sd, t) { + + ## states: 0, 0.5, 1, 1.5, 2, 3 + state_transitions = matrix( c(1-5*t, t, t, t, t, t, + t, 1-5*t, t, t, t, t, + t, t, 1-5*t, t, t, t, + t, t, t, 1-5*t, t, t, + t, t, t, t, 1-5*t, t, + t, t, t, t, t, 1-5*t), + byrow=TRUE, + nrow=6) + + delta=c(t, t, 1-5*t, t, t, t) # more likely normal, + + state_emission_params = list(mean=c(cnv_mean_sd[["cnv:0.01"]]$mean, + cnv_mean_sd[["cnv:0.5"]]$mean, + cnv_mean_sd[["cnv:1"]]$mean, + cnv_mean_sd[["cnv:1.5"]]$mean, + cnv_mean_sd[["cnv:2"]]$mean, + cnv_mean_sd[["cnv:3"]]$mean), + + sd=c(cnv_mean_sd[["cnv:0.01"]]$sd, + cnv_mean_sd[["cnv:0.5"]]$sd, + cnv_mean_sd[["cnv:1"]]$sd, + cnv_mean_sd[["cnv:1.5"]]$sd, + cnv_mean_sd[["cnv:2"]]$sd, + cnv_mean_sd[["cnv:3"]]$sd) ) + + + + HMM_info = list(state_transitions=state_transitions, + delta=delta, + state_emission_params=state_emission_params) + + return(HMM_info) +} + + +#' @title predict_CNV_via_HMM_on_indiv_cells +#' +#' @description predict CNV levels at the individual cell level, using the i6 HMM +#' +#' @param infercnv_obj infercnv object +#' +#' @param cnv_mean_sd (optional, by default automatically computed based in the infercnv_obj@.hspike object) +#' +#' @param t HMM alt state transition probability (default=1e-6) +#' +#' @return infercnv_obj where the infercnv_obj@expr.data are replaced with the HMM state assignments. +#' +#' @export + +predict_CNV_via_HMM_on_indiv_cells <- function(infercnv_obj, cnv_mean_sd=get_spike_dists(infercnv_obj@.hspike), t=1e-6) { + + flog.info("predict_CNV_via_HMM_on_indiv_cells()") + + HMM_info <- .get_HMM(cnv_mean_sd, t) + + chrs = unique(infercnv_obj@gene_order$chr) + + expr.data = infercnv_obj@expr.data + gene_order = infercnv_obj@gene_order + hmm.data = expr.data + hmm.data[,] = -1 #init to invalid state + + + ## run through each chr separately + lapply(chrs, function(chr) { + chr_gene_idx = which(gene_order$chr == chr) + + ## run through each cell for this chromosome: + lapply(seq_len(ncol(expr.data)), function(cell_idx) { + + gene_expr_vals = as.vector(expr.data[chr_gene_idx,cell_idx]) + + hmm <- HiddenMarkov::dthmm(x=gene_expr_vals, + Pi=HMM_info[['state_transitions']], + delta=HMM_info[['delta']], + distn="norm", + pm=HMM_info[['state_emission_params']]) + + ## hmm_trace <- HiddenMarkov::Viterbi(hmm) + hmm_trace <- Viterbi.dthmm.adj(hmm) + + hmm.data[chr_gene_idx,cell_idx] <<- hmm_trace + }) + }) + + infercnv_obj@expr.data <- hmm.data + + return(infercnv_obj) + +} + + +#' @title predict_CNV_via_HMM_on_tumor_subclusters +#' +#' @description predict CNV levels at the tumor subcluster level, using the i6 HMM +#' +#' @param infercnv_obj infercnv object +#' +#' @param cnv_mean_sd (optional, by default automatically computed based in the infercnv_obj@.hspike object) +#' +#' @param cnv_level_to_mean_sd_fit (optional, by default automatically computed based on get_hspike_cnv_mean_sd_trend_by_num_cells_fit(infercnv_obj@.hspike) +#' +#' @param t HMM alt state transition probability (default=1e-6) +#' +#' @return infercnv_obj where the infercnv_obj@expr.data are replaced with the HMM state assignments. +#' +#' @export + +predict_CNV_via_HMM_on_tumor_subclusters <- function(infercnv_obj, + cnv_mean_sd=get_spike_dists(infercnv_obj@.hspike), + cnv_level_to_mean_sd_fit=get_hspike_cnv_mean_sd_trend_by_num_cells_fit(infercnv_obj@.hspike), + t=1e-6 + ) { + + + + + flog.info("predict_CNV_via_HMM_on_tumor_subclusters") + + if (is.null(infercnv_obj@tumor_subclusters)) { + flog.warn("No subclusters defined, so instead running on whole samples") + return(predict_CNV_via_HMM_on_whole_tumor_samples(infercnv_obj, cnv_mean_sd, cnv_level_to_mean_sd_fit, t)); + } + + + HMM_info <- .get_HMM(cnv_mean_sd, t) + + chrs = unique(infercnv_obj@gene_order$chr) + + expr.data = infercnv_obj@expr.data + gene_order = infercnv_obj@gene_order + hmm.data = expr.data + hmm.data[,] = -1 #init to invalid state + + tumor_subclusters <- unlist(infercnv_obj@tumor_subclusters[["subclusters"]], recursive=FALSE) + + ## add the normals, so they get predictions too: + tumor_subclusters <- c(tumor_subclusters, infercnv_obj@reference_grouped_cell_indices) + + ## run through each chr separately + lapply(chrs, function(chr) { + chr_gene_idx = which(gene_order$chr == chr) + + ## run through each cell for this chromosome: + lapply(tumor_subclusters, function(tumor_subcluster_cells_idx) { + + gene_expr_vals = rowMeans(expr.data[chr_gene_idx,tumor_subcluster_cells_idx,drop=FALSE]) + + num_cells = length(tumor_subcluster_cells_idx) + + state_emission_params <- .get_state_emission_params(num_cells, cnv_mean_sd, cnv_level_to_mean_sd_fit) + + hmm <- HiddenMarkov::dthmm(gene_expr_vals, + HMM_info[['state_transitions']], + HMM_info[['delta']], + "norm", + state_emission_params) + + ## hmm_trace <- HiddenMarkov::Viterbi(hmm) + hmm_trace <- Viterbi.dthmm.adj(hmm) + + hmm.data[chr_gene_idx,tumor_subcluster_cells_idx] <<- hmm_trace + }) + }) + + infercnv_obj@expr.data <- hmm.data + + flog.info("-done predicting CNV based on initial tumor subclusters") + + return(infercnv_obj) + +} + +#' @title predict_CNV_via_HMM_on_whole_tumor_samples +#' +#' @description predict CNV levels at the tumor sample level, using the i6 HMM +#' +#' @param infercnv_obj infercnv object +#' +#' @param cnv_mean_sd (optional, by default automatically computed based in the infercnv_obj@.hspike object) +#' +#' @param cnv_level_to_mean_sd_fit (optional, by default automatically computed based on get_hspike_cnv_mean_sd_trend_by_num_cells_fit(infercnv_obj@.hspike) +#' +#' @param t HMM alt state transition probability (default=1e-6) +#' +#' @return infercnv_obj where the infercnv_obj@expr.data are replaced with the HMM state assignments. +#' +#' @export + + +predict_CNV_via_HMM_on_whole_tumor_samples <- function(infercnv_obj, + cnv_mean_sd=get_spike_dists(infercnv_obj@.hspike), + cnv_level_to_mean_sd_fit=get_hspike_cnv_mean_sd_trend_by_num_cells_fit(infercnv_obj@.hspike), + t=1e-6 + ) { + + + flog.info("predict_CNV_via_HMM_on_whole_tumor_samples") + + HMM_info <- .get_HMM(cnv_mean_sd, t) + + chrs = unique(infercnv_obj@gene_order$chr) + + expr.data = infercnv_obj@expr.data + gene_order = infercnv_obj@gene_order + hmm.data = expr.data + hmm.data[,] = -1 #init to invalid state + + ## add the normals, so they get predictions too: + tumor_samples <- c(infercnv_obj@observation_grouped_cell_indices, infercnv_obj@reference_grouped_cell_indices) + + ## run through each chr separately + lapply(chrs, function(chr) { + chr_gene_idx = which(gene_order$chr == chr) + + ## run through each cell for this chromosome: + lapply(tumor_samples, function(tumor_sample_cells_idx) { + + gene_expr_vals = rowMeans(expr.data[chr_gene_idx,tumor_sample_cells_idx,drop=FALSE]) + + num_cells = length(tumor_sample_cells_idx) + + state_emission_params <- .get_state_emission_params(num_cells, cnv_mean_sd, cnv_level_to_mean_sd_fit) + + hmm <- HiddenMarkov::dthmm(gene_expr_vals, + HMM_info[['state_transitions']], + HMM_info[['delta']], + "norm", + state_emission_params) + + ## hmm_trace <- HiddenMarkov::Viterbi(hmm) + hmm_trace <- Viterbi.dthmm.adj(hmm) + + hmm.data[chr_gene_idx,tumor_sample_cells_idx] <<- hmm_trace + }) + }) + + infercnv_obj@expr.data <- hmm.data + + flog.info("-done predicting CNV based on initial tumor subclusters") + + return(infercnv_obj) + +} + + +#' @title .get_state_emission_params +#' +#' @description Given a specified number of cells, determines the standard deviation for each of the cnv states +#' based on the linear model fit. +#' +#' @param num_cells number of cells in the tumor subcluster +#' +#' @param cnv_mean_sd list of cnv mean,sd values +#' +#' @param cnv_level_to_mean_sd_fit linear model that was fit for each cnv state level +#' +#' @param plot boolean (default=FALSE) +#' +#' @noRd + + +.get_state_emission_params <- function(num_cells, cnv_mean_sd, cnv_level_to_mean_sd_fit, plot=FALSE) { + + for (cnv_level in names(cnv_mean_sd)) { + fit <- cnv_level_to_mean_sd_fit[[cnv_level]] + sd <- exp(predict(fit, newdata=data.frame(num_cells=num_cells))[[1]]) + cnv_mean_sd[[cnv_level]]$sd <- sd + } + + if (plot) { + .plot_cnv_mean_sd_for_num_cells(num_cells, cnv_mean_sd) + } + + state_emission_params = list(mean=c(cnv_mean_sd[["cnv:0.01"]]$mean, + cnv_mean_sd[["cnv:0.5"]]$mean, + cnv_mean_sd[["cnv:1"]]$mean, + cnv_mean_sd[["cnv:1.5"]]$mean, + cnv_mean_sd[["cnv:2"]]$mean, + cnv_mean_sd[["cnv:3"]]$mean), + + sd=c(cnv_mean_sd[["cnv:0.01"]]$sd, + cnv_mean_sd[["cnv:0.5"]]$sd, + cnv_mean_sd[["cnv:1"]]$sd, + cnv_mean_sd[["cnv:1.5"]]$sd, + cnv_mean_sd[["cnv:2"]]$sd, + cnv_mean_sd[["cnv:3"]]$sd) ) + + + return(state_emission_params) +} + + + +#' @title .plot_cnv_mean_sd_for_num_cells +#' +#' @description helper function for plotting the N(mean,sd) for each of the cnv states +#' The plot is written to a file 'state_emissions.{num_cells}.pdf +#' +#' @param num_cells number of cells. Only used to encode into the filename. +#' +#' @param cnv_mean_sd list containing the N(mean,sd) for each of the cnv states +#' +#' @return None +#' +#' @noRd + +.plot_cnv_mean_sd_for_num_cells <- function(num_cells, cnv_mean_sd) { + + pdf(sprintf("state_emissions.%d-cells.pdf", num_cells)) + + # p = ggplot(data.frame(x=c(0,3)), aes(x)) + + p = ggplot(data.frame(x=c(0,3)), aes_string(x='x')) + + stat_function(fun=dnorm, + args=list('mean'=cnv_mean_sd[["cnv:0.01"]]$mean,'sd'=cnv_mean_sd[["cnv:0.01"]]$sd), + aes(colour='cnv:0')) + + stat_function(fun=dnorm, + args=list('mean'=cnv_mean_sd[["cnv:0.5"]]$mean,'sd'=cnv_mean_sd[["cnv:0.5"]]$sd), + aes(colour='cnv:0.5')) + + stat_function(fun=dnorm, + args=list('mean'=cnv_mean_sd[["cnv:1"]]$mean,'sd'=cnv_mean_sd[["cnv:1"]]$sd), + aes(colour='cnv:1')) + + stat_function(fun=dnorm, + args=list('mean'=cnv_mean_sd[["cnv:1.5"]]$mean,'sd'=cnv_mean_sd[["cnv:1.5"]]$sd), + aes(colour='cnv:1.5')) + + stat_function(fun=dnorm, + args=list('mean'=cnv_mean_sd[["cnv:2"]]$mean,'sd'=cnv_mean_sd[["cnv:2"]]$sd), + aes(colour='cnv:2')) + + stat_function(fun=dnorm, + args=list('mean'=cnv_mean_sd[["cnv:3"]]$mean,'sd'=cnv_mean_sd[["cnv:3"]]$sd), + aes(colour='cnv:3')) + + scale_colour_manual("Function", + values=c("blue", "red", "green", "magenta", "orange", "cyan"), + breaks=c("cnv:0","cnv:0.5", "cnv:1", "cnv:1.5", "cnv:2", "cnv:3")) + + plot(p) + + dev.off() + +} + + + +#' @keywords internal +#' @noRd +#' + + +.compare_obs_vs_fit_sc_cnv_sd <- function(infercnv_obj, + cnv_mean_sd=get_spike_dists(infercnv_obj@.hspike), + cnv_level_to_mean_sd_fit=get_hspike_cnv_mean_sd_trend_by_num_cells_fit(infercnv_obj@.hspike)) { + + cnv_levels = names(cnv_mean_sd) + + df = do.call(rbind, lapply(cnv_levels, function(cnv_level) { + sd_obs=cnv_mean_sd[[cnv_level]]$sd + sd_pred=exp(predict(cnv_level_to_mean_sd_fit[[cnv_level]], newdata=data.frame(num_cells=1))) + return(data.frame(cnv=cnv_level, sd_obs=sd_obs, sd_pred=sd_pred)) + + })) + + return(df) +} + + + +#' @title get_predicted_CNV_regions +#' +#' @description Given the infercnv_obj containing the HMM state assignments in the expr.data slot, +#' retrieves a list of CNV regions. +#' +#' @param infercnv_obj infercnv object +#' +#' @param by options("consensus", "subcluster", "cell"), determines the granularity at which to report +#' the CNV regions. Ideally, set to the same level at which the HMM predictions were performed. +#' +#' @return cnv_regions list +#' +#' @export + +get_predicted_CNV_regions <- function(infercnv_obj, by=c("consensus", "subcluster", "cell")) { + by = match.arg(by) + + flog.info(sprintf("get_predicted_CNV_regions(%s)", by)) + + cell_groups = NULL + + if (is.null(infercnv_obj@tumor_subclusters)) { + flog.warn("get_predicted_CNV_regions() - no subclusters defined, resetting reporting mode to consensus") + by <- "consensus" + } + + if (by == "consensus") { + cell_groups = infercnv_obj@observation_grouped_cell_indices + } else if (by == "subcluster") { + cell_groups = unlist(infercnv_obj@tumor_subclusters[["subclusters"]], recursive=FALSE) + } else if (by == "cell") { + cell_groups = lapply(unlist(infercnv_obj@observation_grouped_cell_indices), function(x) x) + } + else { + stop("Error, shouldn't get here ... bug") + } + + cnv_regions = list() + + cnv_counter_start = 0 + for (cell_group_name in names(cell_groups)) { + + cell_group = cell_groups[[cell_group_name]] + #flog.info(sprintf("cell group %s -> %s", cell_group_name, cell_group)) + + flog.info(sprintf("-processing cell_group_name: %s, size: %d", cell_group_name, length(cell_group))) + + cell_group_mtx = infercnv_obj@expr.data[,cell_group,drop=FALSE] + cell_group_names = colnames(cell_group_mtx) + + state_consensus <- .get_state_consensus(cell_group_mtx) + + names(state_consensus) <- rownames(cell_group_mtx) + cnv_gene_regions <- .define_cnv_gene_regions(state_consensus, infercnv_obj@gene_order, cnv_counter_start) + cnv_ranges <- .get_cnv_gene_region_bounds(cnv_gene_regions) + + consensus_state_list = list(cell_group_name=cell_group_name, + cells=cell_group_names, + gene_regions=cnv_gene_regions, + cnv_ranges=cnv_ranges) + + cnv_regions[[length(cnv_regions)+1]] = consensus_state_list + + cnv_counter_start = cnv_counter_start + length(cnv_gene_regions) + + } + + return(cnv_regions) + +} + + +#' @title generate_cnv_region_reports +#' +#' @description writes the CNV region report files +#' +#' @param infercnv_obj infercnv object +#' +#' @param output_filename_prefix prefix for output filename +#' +#' @param out_dir output directory for report files to be written +#' +#' @param ignore_neutral_state numeric value representing the neutral state, which should be excluded from reporting (default: NA) +#' +#' @param by options("consensus", "subcluster", "cell"), determines the granularity at which to report +#' the CNV regions. Ideally, set to the same level at which the HMM predictions were performed. +#' +#' @return None +#' +#' @export + + + +generate_cnv_region_reports <- function(infercnv_obj, + output_filename_prefix, + out_dir, + ignore_neutral_state=NA, + by=c("consensus", "subcluster", "cell") ) { + + + cnv_regions <- get_predicted_CNV_regions(infercnv_obj, by) + + ## cell clusters defined. + cell_clusters_outfile = paste(out_dir, paste0(output_filename_prefix, ".cell_groupings"), sep="/") + + cell_clusters_df = lapply(cnv_regions, function(x) { + cell_group_name = x$cell_group_name + cells = x$cells + ret_df = data.frame(cell_group_name=cell_group_name, cell=cells) + return(ret_df) + }) + + cell_clusters_df = do.call(rbind, cell_clusters_df) + flog.info(sprintf("-writing cell clusters file: %s", cell_clusters_outfile)) + write.table(cell_clusters_df, file=cell_clusters_outfile, row.names=FALSE, quote=FALSE, sep="\t") + + ## regions DF: + regions_outfile = paste(out_dir, paste0(output_filename_prefix, ".pred_cnv_regions.dat"), sep="/") + + regions_df = lapply(cnv_regions, function(x) { + cell_group_name = x$cell_group_name + cnv_ranges = x$cnv_ranges + ret_df = cbind(cell_group_name=cell_group_name, cnv_ranges) + return(ret_df) + }) + + regions_df = do.call(rbind, regions_df) + + ## remove the neutral calls: + if (! is.na(ignore_neutral_state)) { + regions_df = regions_df[regions_df$state != ignore_neutral_state, ] + } + flog.info(sprintf("-writing cnv regions file: %s", regions_outfile)) + write.table(regions_df, regions_outfile, row.names=FALSE, sep="\t", quote=FALSE) + + + ## write the per-gene reports of cnv: + if (by == "cell") { + flog.warn("Note, HMM reporting is being done by 'cell', so this may use more memory, write more info to disk, take more time, ...") + } + gene_cnv_df = lapply(cnv_regions, function(x) { + cell_group_name = x$cell_group_name + gene_region_list = x$gene_regions + gene_region_names = names(gene_region_list) + gene_region_df = lapply(gene_region_names, function(gene_region_name) { + df = gene_region_list[[gene_region_name]] + df = cbind(cell_group_name, gene_region_name, df) + rownames(df) <- NULL + return(df) + }) + gene_region_df = do.call(rbind, gene_region_df) + ## remove state 3 = neutral + gene_region_df = gene_region_df[gene_region_df$state != 3,] + return(gene_region_df) + }) + gene_cnv_df = do.call(rbind, gene_cnv_df) + if (! is.na(ignore_neutral_state)) { + gene_cnv_df = gene_cnv_df[gene_cnv_df$state != ignore_neutral_state, ] + } + + ## write output file: + gene_cnv_outfile = paste(out_dir, paste0(output_filename_prefix, ".pred_cnv_genes.dat"), sep="/") + flog.info(sprintf("-writing per-gene cnv report: %s", gene_cnv_outfile)) + write.table(gene_cnv_df, gene_cnv_outfile, row.names=FALSE, sep="\t", quote=FALSE) + + ## write file containing all genes that were leveraged in the predictions: + gene_order_outfile = paste(out_dir, paste0(output_filename_prefix, ".genes_used.dat"), sep="/") + flog.info(sprintf("-writing gene ordering info: %s", gene_order_outfile)) + write.table(infercnv_obj@gene_order, file=gene_order_outfile, quote=FALSE, sep="\t") + + + return + +} + + +#' @title .get_state_consensus +#' +#' @description gets the state consensus for each gene in the input matrix +#' +#' @param cell_group_matrix matrix of [genes,cells] for which to apply consensus operation at gene level across cells. +#' +#' @return vector containing consensus state assignments for genes. +#' +#' @noRd + +.get_state_consensus <- function(cell_group_matrix) { + + consensus = apply(cell_group_matrix, 1, function(x) { + t = table(x) + names(t)[order(t, decreasing=TRUE)[1]] + }) + + consensus <- as.numeric(consensus) + + return(consensus) +} + + +#' @title .define_cnv_gene_regions +#' +#' @description Given the state consensus vector and gene order info, defines cnv regions +#' based on consistent ordering and cnv state +#' +#' @param state_consensus state consensus vector +#' +#' @param gene_order the infercnv_obj@gene_order info +#' +#' @param cnv_region_counter number x where counting starts at x+1, used to provide unique region names. +#' +#' @return regions list containing the cnv regions defined. +#' +#' @noRd + +.define_cnv_gene_regions <- function(state_consensus, gene_order, cnv_region_counter) { + + regions = list() + + gene_names = rownames(gene_order) + + chrs = unique(gene_order$chr) + for (chr in chrs) { + gene_idx = which(gene_order$chr==chr) + if (length(gene_idx) < 2) { break; } + + chr_states = state_consensus[gene_idx] + prev_state = chr_states[1] + ## pos_begin = paste(gene_order[gene_idx[1],,drop=TRUE], collapse=",") + pos_begin = gene_order[gene_idx[1],,drop=TRUE] + + cnv_region_counter = cnv_region_counter + 1 + + cnv_region_name = sprintf("%s-region_%d", chr, cnv_region_counter) + current_cnv_region = data.frame(state=prev_state, + gene=gene_names[gene_idx[1]], + chr=pos_begin$chr, + start=pos_begin$start, + end=pos_begin$stop) + regions[[cnv_region_name]] = current_cnv_region + + for (i in seq(2,length(gene_idx))) { + state = chr_states[i] + pos_end = gene_order[gene_idx[i-1],,drop=TRUE] + next_gene_entry = data.frame(state=state, + gene=gene_names[gene_idx[i]], + chr=pos_end$chr, + start=pos_end$start, + end=pos_end$stop) + + if (state != prev_state) { + ## state transition + ## start new cnv region + cnv_region_counter = cnv_region_counter + 1 + cnv_region_name = sprintf("%s-region_%d", chr, cnv_region_counter) + regions[[cnv_region_name]] = next_gene_entry + } else { + ## append gene to current cnv region + regions[[cnv_region_name]] = rbind(regions[[cnv_region_name]], next_gene_entry) + } + + prev_state = state + } + + } + + return(regions) +} + + +#' @title .get_cnv_gene_region_bounds +#' +#' @description Given the cnv regions list, defines a data table containing +#' the cnv region name, state, chr, start, and end value. +#' +#' @param cnv_gene_regions +#' +#' @return data.frame containing the cnv region summary table +#' +#' @noRd + +.get_cnv_gene_region_bounds <- function(cnv_gene_regions) { + + bounds = do.call(rbind, lapply(names(cnv_gene_regions), function(x) { + cnv_name = x + df = cnv_gene_regions[[cnv_name]] + state = df$state[1] + chr = df$chr[1] + start = min(df$start) + end = max(df$end) + + return(data.frame(cnv_name, state, chr, start, end)) + }) ) + + rownames(bounds) <- NULL + + return(bounds) +} + + +#' @title Viterbi.dthmm.adj +#' +#' @description Viterbi method extracted from the HiddenMarkov package and modified +#' to use our scoring system. +#' +#' @param HiddenMarkov object +#' +#' @return vector containing the viterbi state assignments +#' +#' @noRd + +Viterbi.dthmm.adj <- function (object, ...){ + x <- object$x + + if (length(x) < 2) { + ## not enough run a trace on + return(3); # neutral state + } + + dfunc <- HiddenMarkov:::makedensity(object$distn) + n <- length(x) + m <- nrow(object$Pi) # transition matrix + nu <- matrix(NA, nrow = n, ncol = m) # scoring matrix + y <- rep(NA, n) # final trace + pseudocount = 1e-20 + + emissions <- matrix(NA, nrow = n, ncol = m) + + ## ############################### + ## restrict to constant variance to avoid nonsensical results: + + ## object$pm$sd = max(object$pm$sd) + object$pm$sd = median(object$pm$sd) # max is too high + + ## ############################### + + + ## init first row + + emission <- pnorm(abs(x[1]-object$pm$mean)/object$pm$sd, log.p=TRUE, lower.tail=FALSE) + emission <- 1 / (-1 * emission) + emission <- emission / sum(emission) + + emissions[1,] <- log(emission) + + nu[1, ] <- log(object$delta) + # start probabilities + emissions[1,] + + + #nu[1, ] <- log(object$delta) + # start probabilities + # dfunc(x=x[1], # mean expr val for gene_1 + # object$pm, # parameters (mean, sd) for norm dist + # HiddenMarkov:::getj(object$pn, 1), # NULL value + # log=TRUE) # returns p-values as log(p) + + logPi <- log(object$Pi) # convert transition matrix to log(p) + + for (i in 2:n) { + + matrixnu <- matrix(nu[i - 1, ], nrow = m, ncol = m) + + #nu[i, ] <- apply(matrixnu + logPi, 2, max) + + # dfunc(x=x[i], object$pm, getj(object$pn, i), + # log=TRUE) + + + emission <- pnorm(abs(x[i]-object$pm$mean)/object$pm$sd, log.p=TRUE, lower.tail=FALSE) + emission <- 1 / (-1 * emission) + emission <- emission / sum(emission) + + emissions[i, ] <- log(emission) + + nu[i, ] <- apply(matrixnu + logPi, 2, max) + emissions[i, ] + + } + if (any(nu[n, ] == -Inf)) + stop("Problems With Underflow") + + + ## traceback + y[n] <- which.max(nu[n, ]) + + for (i in seq(n - 1, 1, -1)) + y[i] <- which.max(logPi[, y[i + 1]] + nu[i, ]) + + return(y) +} + + +#' @title assign_HMM_states_to_proxy_expr_vals +#' +#' @description Replaces the HMM state assignments with the cnv levels they represent. +#' +#' @param infercnv_obj infercnv object +#' +#' @return infercnv_obj +#' +#' @export + +assign_HMM_states_to_proxy_expr_vals <- function(infercnv_obj) { + + expr.data = infercnv_obj@expr.data + + expr.data[expr.data == 1] <- 0 + expr.data[expr.data == 2] <- 0.5 + expr.data[expr.data == 3] <- 1 + expr.data[expr.data == 4] <- 1.5 + expr.data[expr.data == 5] <- 2 + expr.data[expr.data == 6] <- 3 + + infercnv_obj@expr.data <- expr.data + + return(infercnv_obj) + +} + diff --git a/R/inferCNV_constants.R b/R/inferCNV_constants.R index 543ff7e1..5ef49ea5 100755 --- a/R/inferCNV_constants.R +++ b/R/inferCNV_constants.R @@ -9,11 +9,16 @@ C_STOP <- "stop" C_HCLUST_METHODS <- c("ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "median", "centroid") C_OUTPUT_FORMAT <- c("pdf", "png") + +## also including some globals: +infercnv.env <- new.env() +infercnv.env$GLOBAL_NUM_THREADS <- 1 # default is single-threaded. + + #' @importFrom grDevices col2rgb colorRampPalette dev.off pdf png rgb #' @importFrom graphics abline axis boxplot hist image layout lines mtext par plot points rect text title legend -#' @importFrom stats as.dendrogram as.dist cutree density dist filter median order.dendrogram quantile reorder sd complete.cases cor t.test p.adjust predict rnorm runif smooth.spline var wilcox.test -#' @importFrom utils flush.console read.table write.table tail -#' @importFrom binhf ansc +#' @importFrom stats as.dendrogram as.dist cutree density dist filter median order.dendrogram quantile reorder sd complete.cases cor t.test p.adjust predict rnorm runif smooth.spline var wilcox.test dnorm ecdf ks.test lm nls pnorm qgamma qnorm rbinom rchisq rgamma rlnorm rnbinom rpois shapiro.test update +#' @importFrom utils flush.console read.table write.table tail read.csv #' @import futile.logger #' @importFrom methods setClass new is #' @importFrom gplots bluered @@ -21,8 +26,27 @@ C_OUTPUT_FORMAT <- c("pdf", "png") #' @importFrom fastcluster hclust #' @import RColorBrewer #' @importFrom Matrix Matrix rowMeans colSums -#' @import coin #' @importFrom dplyr %>% count +#' @import fitdistrplus +#' @import foreach +#' @import doParallel +#' @import future +#' @import coda +#' @import ggplot2 +#' @import argparse +#' @importFrom edgeR estimateDisp +#' @importFrom caTools runmean +#' @importFrom coin oneway_test pvalue +#' @importFrom reshape melt +#' @importFrom rjags jags.model coda.samples +#' @importFrom BiocGenerics counts t +#' @importFrom SummarizedExperiment start colData rowData assays assays<- rowData<- colData<- +#' @importFrom SingleCellExperiment SingleCellExperiment +#' @importFrom tidyr gather +#' @importFrom parallel detectCores +#' @importFrom gridExtra ttheme_default tableGrob gtable_combine marrangeGrob + + NULL diff --git a/R/inferCNV_heatmap.R b/R/inferCNV_heatmap.R index 93ef2fb8..3ad0cfbd 100755 --- a/R/inferCNV_heatmap.R +++ b/R/inferCNV_heatmap.R @@ -29,11 +29,15 @@ get_group_color_palette <- function(){ #' @param output_filename Filename to save the figure to. #' @param output_format format for heatmap image file (default: 'png'), options('png', 'pdf', NA) #' If set to NA, will print graphics natively +#' @param png_res Resolution for png output. +#' @param dynamic_resize Factor (>= 0) by which to scale the dynamic resize of the observation +#' heatmap and the overall plot based on how many cells there are. +#' Default is 0, which disables the scaling. Try 1 first if you want to enable. #' @param ref_contig If given, will focus cluster on only genes in this contig. #' @param write_expr_matrix Includes writing a matrix file containing the expression data that is plotted in the heatmap. #' -#' @return -#' No return, void. +#' @return A list of all relevent settings used for the plotting to be able to reuse them in another plot call while keeping consistant plotting settings, most importantly x.range. +#' #' #' @export #' @@ -47,12 +51,14 @@ plot_cnv <- function(infercnv_obj, cluster_by_groups=TRUE, k_obs_groups = 3, contig_cex=1, - x.center=0, - x.range=NA, + x.center=mean(infercnv_obj@expr.data), + x.range="auto", #NA, hclust_method='ward.D', - color_safe_pal=TRUE, + color_safe_pal=FALSE, output_filename="infercnv", output_format="png", #pdf, png, NA + png_res=300, + dynamic_resize=0, ref_contig = NULL, write_expr_matrix=FALSE) { @@ -74,9 +80,9 @@ plot_cnv <- function(infercnv_obj, flog.info(paste("::plot_cnv:Start", sep="")) flog.info(paste("::plot_cnv:Current data dimensions (r,c)=", paste(dim(plot_data), collapse=","), - " Total=", sum(plot_data, na.rm=T), - " Min=", min(plot_data, na.rm=T), - " Max=", max(plot_data, na.rm=T), + " Total=", sum(plot_data, na.rm=TRUE), + " Min=", min(plot_data, na.rm=TRUE), + " Max=", max(plot_data, na.rm=TRUE), ".", sep="")) flog.info(paste("::plot_cnv:Depending on the size of the matrix", " this may take a moment.", @@ -88,7 +94,7 @@ plot_cnv <- function(infercnv_obj, expr_dat_file <- paste(out_dir, paste("expr.", output_filename, ".dat", sep=""), sep="/") if (class(plot_data) %in% c("matrix", "data.frame")) { - write.table(plot_data, file=expr_dat_file, quote=F, sep="\t") + write.table(plot_data, file=expr_dat_file, quote=FALSE, sep="\t") } } @@ -106,6 +112,7 @@ plot_cnv <- function(infercnv_obj, delta = max( abs( c(x.center - quantiles[1], quantiles[2] - x.center) ) ) low_threshold = x.center - delta high_threshold = x.center + delta + x.range = c(low_threshold, high_threshold) flog.info(sprintf("plot_cnv(): auto thresholding at: (%f , %f)", low_threshold, high_threshold)) @@ -143,10 +150,13 @@ plot_cnv <- function(infercnv_obj, c(2, 2)) } - # Row separation based on reference - ref_idx <- unlist(infercnv_obj@reference_grouped_cell_indices) - ref_idx = ref_idx[order(ref_idx)] - + ## Row separation based on reference + ref_idx <- NULL + if (has_reference_cells(infercnv_obj)) { + ref_idx <- unlist(infercnv_obj@reference_grouped_cell_indices) + ref_idx = ref_idx[order(ref_idx)] + } + # Column seperation by contig and label axes with only one instance of name contig_tbl <- table(contigs)[unique_contigs] col_sep <- cumsum(contig_tbl) @@ -178,8 +188,20 @@ plot_cnv <- function(infercnv_obj, } # restrict to just the obs indices - obs_annotations_groups <- obs_annotations_groups[-ref_idx] + if (! is.null(ref_idx)) { + obs_annotations_groups <- obs_annotations_groups[-ref_idx] + } + if (is.null(dynamic_resize) | dynamic_resize < 0) { + flog.warn(paste("invalid dynamic_resize value: ", dynamic_resize, sep="")) + dynamic_resize = 0 + } + dynamic_extension = 0 + nobs = length(unlist(infercnv_obj@observation_grouped_cell_indices)) + if (nobs > 200) { + dynamic_extension = dynamic_resize * 3.6 * (nobs - 200)/200 + } + grouping_key_coln[1] <- floor(123/(max(nchar(obs_annotations_names)) + 4)) ## 123 is the max width in number of characters, 4 is the space taken by the color box itself and the spacing around it if (grouping_key_coln[1] < 1) { grouping_key_coln[1] <- 1 @@ -203,14 +225,14 @@ plot_cnv <- function(infercnv_obj, pdf(paste(out_dir, paste(output_filename, ".pdf", sep=""), sep="/"), useDingbats=FALSE, width=10, - height=(8.13 + sum(grouping_key_height)), + height=(8.22 + sum(grouping_key_height)) + dynamic_extension, paper="USr") } else if (output_format == "png") { png(paste(out_dir, paste(output_filename, ".png", sep=""), sep="/"), width=10, - height=(8.13 + sum(grouping_key_height)), + height=(8.22 + sum(grouping_key_height)) + dynamic_extension, units="in", - res=600) + res=png_res) } } @@ -255,11 +277,13 @@ plot_cnv <- function(infercnv_obj, # Create file base for plotting output - force_layout <- .plot_observations_layout(grouping_key_height=grouping_key_height) - .plot_cnv_observations(obs_data=obs_data_t, + force_layout <- .plot_observations_layout(grouping_key_height=grouping_key_height, dynamic_extension=dynamic_extension) + .plot_cnv_observations(infercnv_obj=infercnv_obj, + obs_data=obs_data_t, file_base_name=out_dir, output_filename_prefix=output_filename, cluster_contig=ref_contig, + contigs=contigs, contig_colors=ct.colors[contigs], contig_labels=contig_labels, contig_names=contig_names, @@ -298,6 +322,17 @@ plot_cnv <- function(infercnv_obj, if (! is.na(output_format)) { dev.off() } + + return(list(cluster_by_groups = cluster_by_groups, + k_obs_groups = k_obs_groups, + contig_cex = contig_cex, + x.center = x.center, + x.range = x.range, + hclust_method = hclust_method, + color_safe_pal = color_safe_pal, + output_format = output_format, + png_res = png_res, + dynamic_resize = dynamic_resize)) } # TODO Tested, test make files so turned off but can turn on and should pass. @@ -331,7 +366,8 @@ plot_cnv <- function(infercnv_obj, #' @noRd #' -.plot_cnv_observations <- function(obs_data, +.plot_cnv_observations <- function(infercnv_obj, + obs_data, col_pal, contig_colors, contig_labels, @@ -343,6 +379,7 @@ plot_cnv <- function(infercnv_obj, cnv_title, cnv_obs_title, contig_lab_size=1, + contigs, cluster_contig=NULL, obs_annotations_groups, obs_annotations_names, @@ -398,9 +435,40 @@ plot_cnv <- function(infercnv_obj, isfirst <- TRUE hcl_obs_annotations_groups <- vector() obs_seps <- c() + sub_obs_seps <- c() # never use at this time? available if we want to add splits in the heatmap for subclusters - - if (cluster_by_groups) { + if (!is.null(infercnv_obj@tumor_subclusters)) { + # for (tumor in obs_annotations_names) { #tumor_subclusters$hc) { + for (i in seq(1, max(obs_annotations_groups))) { + obs_dendrogram[[i]] = as.dendrogram(infercnv_obj@tumor_subclusters$hc[[ obs_annotations_names[i] ]]) + ordered_names <- c(ordered_names, row.names(obs_data[which(obs_annotations_groups == i), hcl_group_indices])[(infercnv_obj@tumor_subclusters$hc[[ obs_annotations_names[i] ]])$order]) + obs_seps <- c(obs_seps, length(ordered_names)) + hcl_obs_annotations_groups <- c(hcl_obs_annotations_groups, rep(i, length(which(obs_annotations_groups == i)))) + + if (isfirst) { + write.tree(as.phylo(infercnv_obj@tumor_subclusters$hc[[ obs_annotations_names[i] ]]), + file=paste(file_base_name, sprintf("%s.observations_dendrogram.txt", output_filename_prefix), sep=.Platform$file.sep)) + isfirst <- FALSE + } + else { + write.tree(as.phylo(infercnv_obj@tumor_subclusters$hc[[ obs_annotations_names[i] ]], + file=paste(file_base_name, sprintf("%s.observations_dendrogram.txt", output_filename_prefix), sep=.Platform$file.sep), append=TRUE)) + } + } + if (length(obs_dendrogram) > 1) { + obs_dendrogram <- do.call(merge, obs_dendrogram) + } else { + obs_dendrogram <- obs_dendrogram[[1]] + } + split_groups <- rep(1, dim(obs_data)[1]) + names(split_groups) <- ordered_names + + for(subtumor in infercnv_obj@tumor_subclusters$subclusters[[ obs_annotations_names[i] ]]) { + length(subtumor) + sub_obs_seps <- c(sub_obs_seps, (sub_obs_seps[length(sub_obs_seps)] + length(subtumor))) + } + } + else if (cluster_by_groups) { ## Clustering separately by groups (ie. patients) @@ -414,7 +482,7 @@ plot_cnv <- function(infercnv_obj, next } - data_to_cluster <- obs_data[gene_indices_in_group, hcl_group_indices, drop=F] + data_to_cluster <- obs_data[gene_indices_in_group, hcl_group_indices, drop=FALSE] flog.info(paste("group size being clustered: ", paste(dim(data_to_cluster), collapse=","), sep=" ")) group_obs_hcl <- hclust(dist(data_to_cluster), method=hclust_method) ordered_names <- c(ordered_names, row.names(obs_data[which(obs_annotations_groups == i), hcl_group_indices])[group_obs_hcl$order]) @@ -443,6 +511,7 @@ plot_cnv <- function(infercnv_obj, } split_groups <- rep(1, dim(obs_data)[1]) names(split_groups) <- ordered_names + sub_obs_seps = obs_seps } else { # clustering all groups together @@ -476,12 +545,14 @@ plot_cnv <- function(infercnv_obj, } } obs_seps <- c(obs_seps, length(ordered_names)) + sub_obs_seps = obs_seps } - + + if (length(obs_seps) > 1) { obs_seps <- obs_seps[length(obs_seps)] - obs_seps[(length(obs_seps) - 1):1] } - + # Output HCL group membership. # Record locations of seperations @@ -514,6 +585,9 @@ plot_cnv <- function(infercnv_obj, orig_row_names <- row.names(obs_data) row.names(obs_data) <- rep("", nrow(obs_data)) + heatmap_thresholds_file_name <- file.path(file_base_name, sprintf("%s.heatmap_thresholds.txt", output_filename_prefix)) + write.table(breaksList, heatmap_thresholds_file_name, row.names=FALSE, col.names=FALSE) + data_observations <- heatmap.cnv(obs_data, Rowv=obs_dendrogram, Colv=FALSE, @@ -582,7 +656,7 @@ plot_cnv <- function(infercnv_obj, #' @keywords internal #' @noRd #' -.plot_observations_layout <- function(grouping_key_height) +.plot_observations_layout <- function(grouping_key_height, dynamic_extension) { ## Plot observational samples obs_lmat <- c(0, 0, 0, 0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, @@ -608,8 +682,8 @@ plot_cnv <- function(infercnv_obj, obs_lhei <- c(1.125, 2.215, .15, .5, .5, .5, .5, .5, .5, - .5, .5, .5, - 0.03, grouping_key_height[1]+0.04, grouping_key_height[2]+0.04, 0.03) + .5, .5, .5 + dynamic_extension, + 0.1, grouping_key_height[1], grouping_key_height[2], 0.13) return(list(lmat=obs_lmat, lhei=obs_lhei, @@ -727,6 +801,7 @@ plot_cnv <- function(infercnv_obj, # Print controls flog.info("plot_cnv_references:Plotting heatmap.") + data_references <- heatmap.cnv(ref_data, main=NULL, #NA, ylab=reference_ylab, @@ -2158,6 +2233,8 @@ heatmap.cnv <- } else{ title(key.title,cex.main=cex.key,font.main=1) } + + par(mar=op.ori$mar, cex=op.ori$cex, mgp=op.ori$mgp, tcl=op.ori$tcl, usr=op.ori$usr) } else{ if(!force_add){ .plot.text() @@ -2374,3 +2451,52 @@ get.sep <- } +##################################################### +## Custom infercnv functions related to visualization + + +depress_low_signal_midpt_ratio <- function(infercnv_obj, expr_mean, midpt_ratio=0.2, slope=20) { + + expr_bounds = infercnv::get_average_bounds(infercnv_obj) + + delta_mean = max(expr_mean - expr_bounds[1], expr_bounds[2] - expr_mean) + delta_midpt = delta_mean * midpt_ratio + + infercnv_obj <- depress_log_signal_midpt_val(infercnv_obj, expr_mean, delta_midpt, slope) + + return(infercnv_obj) + +} + +depress_log_signal_midpt_val <- function(infercnv_obj, expr_mean, delta_midpt, slope=20) { + + + infercnv_obj@expr.data <- .apply_logistic_val_adj(infercnv_obj@expr.data, expr_mean, delta_midpt, slope) + + return(infercnv_obj) +} + + +.apply_logistic_val_adj <- function(vals_matrix, expr_mean, delta_midpt, slope) { + + adjust_value <- function(x) { + newval = x + val = abs(x - expr_mean) + p = .logistic(val, delta_midpt, slope) + if (x > expr_mean) { + newval = expr_mean + p*val + } else if (x < expr_mean) { + newval = expr_mean - p*val + } + return(newval) + } + + + vals_matrix <- apply(vals_matrix, 1:2, adjust_value) + + return(vals_matrix) +} + + + + diff --git a/R/inferCNV_hidden_spike.R b/R/inferCNV_hidden_spike.R new file mode 100644 index 00000000..70e49ddc --- /dev/null +++ b/R/inferCNV_hidden_spike.R @@ -0,0 +1,272 @@ + + +.build_and_add_hspike <- function(infercnv_obj, sim_method=c('meanvar', 'simple', 'splatter'), aggregate_normals=FALSE) { + + sim_method = match.arg(sim_method) + + flog.info("Adding h-spike") + + if (has_reference_cells(infercnv_obj)) { + if (aggregate_normals) { + ## for experimental use / data exploration + normal_cells_idx_lists = list(); + normal_cells_idx_lists[[ 'normalsToUse' ]] = unlist(infercnv_obj@reference_grouped_cell_indices) + } else { + ## the usual method to use + normal_cells_idx_lists = infercnv_obj@reference_grouped_cell_indices + } + } else { + ## the reference-less case: + idx = unlist(infercnv_obj@observation_grouped_cell_indices) + normal_cells_idx_lists = list() + normal_cells_idx_lists[[ 'normalsToUse' ]] = idx + flog.info("-no normals defined, using all observation cells as proxy") + } + + params = list() + + ## build a fake genome with fake chromosomes, alternate between 'normal' and 'variable' regions. + + num_cells = 100 + num_genes_per_chr = 400 + + num_total_genes = nrow(infercnv_obj@expr.data) + + chr_info <- .get_hspike_chr_info(num_genes_per_chr, num_total_genes) + + gene_order = do.call(rbind, lapply(chr_info, function(x) { data.frame(chr=x$name, start=1:x$ngenes, end=1:x$ngenes) })) + num_genes = nrow(gene_order) + rownames(gene_order) <- paste0("gene_", 1:num_genes) + + + genes_means_use_idx = sample(x=1:nrow(infercnv_obj@expr.data), size=num_genes, replace=TRUE) + + + ## do for each group of normal cells + sim.counts.matrix = NULL + reference_grouped_cell_indices = list() + observation_grouped_cell_indices = list() + cell_counter = 0 + for (normal_type in names(normal_cells_idx_lists)) { + + flog.info(sprintf("-hspike modeling of %s", normal_type)) + + normal_cells_idx <- normal_cells_idx_lists[[ normal_type ]] + + ## sample gene info from the normal data + normal_cells_expr = infercnv_obj@expr.data[,normal_cells_idx] + gene_means_orig = rowMeans(normal_cells_expr) + gene_means = gene_means_orig[genes_means_use_idx] + + write.table(gene_means, sprintf("gene_means.before.%s",sub(pattern="[^A-Za-z0-9]", replacement="_", x=normal_type, perl=TRUE)), quote=FALSE, sep="\t", row.names=FALSE, col.names=FALSE); + + gene_means[gene_means==0] = 1e-3 # just make small nonzero values + + + names(gene_means) = rownames(gene_order) + + mean_p0_table <- NULL + + ## simulate normals: + if (sim_method == 'splatter') { + params = .estimateSingleCellParamsSplatterScrape(counts=infercnv_obj@count.data[,normal_cells_idx]) + params[["nGenes"]] <- num_genes + params[["nCells"]] <- num_cells + sim.scExpObj = .simulateSingleCellCountsMatrixSplatterScrape(params, use.genes.means=gene_means) + + sim_normal_matrix = counts(sim.scExpObj) + } else if (sim_method == 'simple') { + ## using simple + mean_p0_table <- .get_mean_vs_p0_from_matrix(normal_cells_expr) + sim_normal_matrix <- .get_simulated_cell_matrix(gene_means, + mean_p0_table=mean_p0_table, + num_cells=num_cells, + common_dispersion=0.1) + } else if (sim_method == 'meanvar') { + ## using mean,var trend + sim_normal_matrix <- .get_simulated_cell_matrix_using_meanvar_trend(infercnv_obj, gene_means, num_cells, include.dropout=TRUE) + + } + + + spike_norm_name = sprintf("simnorm_cell_%s", normal_type) + colnames(sim_normal_matrix) = paste0(spike_norm_name, 1:num_cells) + rownames(sim_normal_matrix) = rownames(gene_order) + + ## apply spike-in multiplier vec + hspike_gene_means = gene_means + for (info in chr_info) { + chr_name = info$name + cnv = info$cnv + if (cnv != 1) { + gene_idx = which(gene_order$chr == chr_name) + hspike_gene_means[gene_idx] = hspike_gene_means[gene_idx] * cnv + } + } + + + if (sim_method == 'splatter') { + sim_spiked_cnv.scExpObj = .simulateSingleCellCountsMatrixSplatterScrape(params, + use.genes.means=hspike_gene_means) + sim_spiked_cnv_matrix = counts(sim_spiked_cnv.scExpObj) + } else if (sim_method == 'simple') { + ## using simple + sim_spiked_cnv_matrix <- .get_simulated_cell_matrix(hspike_gene_means, + mean_p0_table=mean_p0_table, + num_cells=num_cells, + common_dispersion=0.1) + } else if (sim_method == 'meanvar') { + + sim_spiked_cnv_matrix <- .get_simulated_cell_matrix_using_meanvar_trend(infercnv_obj, hspike_gene_means, num_cells, include.dropout=TRUE) + } + + spike_tumor_name = sprintf("spike_tumor_cell_%s", normal_type) + + colnames(sim_spiked_cnv_matrix) = paste0(spike_tumor_name, 1:num_cells) + rownames(sim_spiked_cnv_matrix) = rownames(gene_order) + + if (is.null(sim.counts.matrix)) { + sim.counts.matrix <- cbind(sim_normal_matrix, sim_spiked_cnv_matrix) + } else { + sim.counts.matrix <- cbind(sim.counts.matrix, sim_normal_matrix, sim_spiked_cnv_matrix) + } + + reference_grouped_cell_indices[[ spike_norm_name ]] = (cell_counter+1):(cell_counter+num_cells) + cell_counter = cell_counter + num_cells + + observation_grouped_cell_indices[[ spike_tumor_name ]] = (cell_counter+1):(cell_counter+num_cells) + cell_counter = cell_counter + num_cells + } + + .hspike <- new( Class="infercnv", + expr.data=sim.counts.matrix, + count.data=sim.counts.matrix, + gene_order=gene_order, + reference_grouped_cell_indices=reference_grouped_cell_indices, + observation_grouped_cell_indices=observation_grouped_cell_indices) + + validate_infercnv_obj(.hspike) + + .hspike <- normalize_counts_by_seq_depth(.hspike, median(colSums(normal_cells_expr))) # make same target counts/cell as normals + + infercnv_obj@.hspike <- .hspike + + return(infercnv_obj) +} + + + + +.get_hspike_chr_info <- function(num_genes_each, num_total) { + + num_remaining = num_total - 10*num_genes_each + if (num_remaining < num_genes_each) { + num_remaining = num_genes_each + } + + ## design for fake chr + chr_info = list(list(name='chrA', + cnv=1, + ngenes=num_genes_each), + list(name='chr_0', + cnv=0.01, + ngenes=num_genes_each), + list(name='chr_B', + cnv=1, + ngenes=num_genes_each), + list(name='chr_0pt5', + cnv=0.5, + ngenes=num_genes_each), + list(name='chr_C', + cnv=1, + ngenes=num_genes_each), + list(name='chr_1pt5', + cnv=1.5, + ngenes=num_genes_each), + list(name='chr_D', + cnv=1, + ngenes=num_genes_each), + list(name='chr_2pt0', + cnv=2.0, + ngenes=num_genes_each), + list(name='chr_E', + cnv=1, + ngenes=num_genes_each), + list(name='chr_3pt0', + cnv=3, + ngenes=num_genes_each), + list(name='chr_F', + cnv=1, + ngenes=num_remaining) + ) + + return(chr_info) + +} + + +## +.sim_foreground <- function(infercnv_obj, sim_method) { + + flog.info("## simulating foreground") + + expr.matrix <- infercnv_obj@expr.data + + samples <- c(infercnv_obj@observation_grouped_cell_indices, infercnv_obj@reference_grouped_cell_indices) + + + normal_data <- expr.matrix[, unlist(infercnv_obj@reference_grouped_cell_indices)] + target_total_counts <- median(colSums(normal_data)) + + params <- NULL + if (sim_method == 'splatter') { + params <- .estimateSingleCellParamsSplatterScrape(infercnv_obj@count.data[, unlist(infercnv_obj@reference_grouped_cell_indices)]) + } + + + mean_p0_table <- NULL + if (sim_method == 'simple') { + mean_p0_table <- .get_mean_vs_p0_from_matrix(normal_data) + } + + for (sample_name in names(samples)) { + + cell_idx = samples[[ sample_name ]] + + sample_expr_data = expr.matrix[, cell_idx] + gene_means = rowMeans(sample_expr_data) + gene_means[gene_means==0] <- 1e-3 #avoid zeros, breaks splatter sim + + num_cells = ncol(sample_expr_data) + + ## sim the tumor matrix + if (sim_method == 'simple') { + + sim_matrix <- .get_simulated_cell_matrix(gene_means, + mean_p0_table=mean_p0_table, + num_cells=num_cells, + common_dispersion=0.1) + } else if (sim_method == 'splatter') { + params[['nCells']] <- num_cells + sim_matrix <- .simulateSingleCellCountsMatrixSplatterScrape(params, gene_means) + sim_matrix <- counts(sim_matrix) + } else if (sim_method == 'meanvar') { + sim_matrix <- .get_simulated_cell_matrix_using_meanvar_trend(infercnv_obj, + gene_means, + num_cells, + include.dropout=TRUE) + } else { + stop(sprintf("not recognizing --sim_method: %s", args$sim_method)) + } + + expr.matrix[, cell_idx] <- sim_matrix + + } + + infercnv_obj@expr.data <- expr.matrix + + infercnv_obj <- normalize_counts_by_seq_depth(infercnv_obj, target_total_counts) + + return(infercnv_obj) +} + diff --git a/R/inferCNV_i3HMM.R b/R/inferCNV_i3HMM.R new file mode 100644 index 00000000..310372fb --- /dev/null +++ b/R/inferCNV_i3HMM.R @@ -0,0 +1,480 @@ + +#' @title .i3HMM_get_sd_trend_by_num_cells_fit +#' +#' @description Determines the characteristics for the tumor cell residual intensities, including +#' fitting the variance in mean intensity as a function of number of cells sampled. +#' +#' @param infercnv_obj infercnv object +#' +#' @param i3_p_val the p-value to use for defining the position of means for the alternate amp/del distributions. +#' +#' @param plot boolean, set to TRUE to plot the mean/var fit. +#' +#' @return normal_sd_trend list +#' +#' @export + + +.i3HMM_get_sd_trend_by_num_cells_fit <- function(infercnv_obj, i3_p_val=0.05, plot=FALSE) { + + + tumor_samples = infercnv_obj@observation_grouped_cell_indices + + tumor_expr_vals <- infercnv_obj@expr.data[,unlist(tumor_samples)] + + mu = mean(tumor_expr_vals) + sigma = sd(tumor_expr_vals) + nrounds = 100 + sds = c() + ngenes = nrow(tumor_expr_vals) + + num_tumor_samples = length(tumor_samples) + flog.info(".i3HMM_get_sd_trend_by_num_cells_fit:: -got ", num_tumor_samples, " samples") + + for (ncells in seq_len(100)) { + means = c() + + for(i in 1:nrounds) { + ## pick a random gene + rand.gene = sample(1:ngenes, size=1) + + ## pick a random normal cell type + rand.sample = sample(1:num_tumor_samples, size=1) + #message("rand.sample: " , rand.sample) + + vals = sample(infercnv_obj@expr.data[rand.gene, tumor_samples[[rand.sample]] ], size=ncells, replace=TRUE) + m_val = mean(vals) + means = c(means, m_val) + } + sds = c(sds, sd(means)) + } + + ## fit linear model + num_cells = 1:length(sds) + fit = lm(log(sds) ~ log(num_cells)) #note, hbadger does something similar, but not for the hmm cnv state levels + + if (plot) { + plot(log(num_cells), log(sds), main='log(sd) vs. log(num_cells)') + } + + ## get distribution position according to p_val in a qnorm + mean_delta = determine_mean_delta_via_Z(sigma, p=i3_p_val) + message("mean_delta: ", mean_delta, ", at sigma: ", sigma, ", and pval: ", i3_p_val) + + ## do this HBadger style in case that option is to be used. + KS_delta = get_HoneyBADGER_setGexpDev(gexp.sd=sigma, alpha=i3_p_val) + message("KS_delta: ", KS_delta, ", at sigma: ", sigma, ", and pval: ", i3_p_val) + + + sd_trend = list(mu=mu, + sigma=sigma, + fit=fit, + mean_delta=mean_delta, + KS_delta=KS_delta) + + return(sd_trend) + +} + + +#' @title .i3HMM_get_HMM +#' +#' @description get the i3 HMM parameterization +#' +#' @param sd_trend the normal sd trend info list +#' +#' @param num_cells number of cells in a subcluster +#' +#' @param t alt state transition probability +#' +#' @param i3_p_val p-value used to define position of mean for amp/del dists (default: 0.05) +#' +#' @param use_KS boolean : use the KS statistic (HBadger style) for determining the position of the mean for the amp/del dists. +#' +#' @return HMM_info list +#' +#' @noRd + +.i3HMM_get_HMM <- function(sd_trend, num_cells, t, i3_p_val=0.05, use_KS) { + + ## Here we do something very similar to HoneyBadger + ## which is to estimate the mean/var for the CNV states + ## based on the KS statistic and the expression values + ## for the normal cells. + + + ## states: 0.5, 1, 1 .5 + state_transitions = matrix( c(1-5*t, t, t, + t, 1-5*t, t, + t, t, 1-5*t), + byrow=TRUE, + nrow=3) + + delta=c(t, 1-5*t, t) # more likely normal, + + mu = sd_trend$mu # normal cell mean + + + #flog.info(sprintf("-.i3HMM_get_HMM, mean_delta=%g, use_KS=%s", mean_delta, use_KS)) + + if (num_cells == 1) { + sigma = sd_trend$sigma + mean_delta = ifelse(use_KS, sd_trend$KS_delta, sd_trend$mean_delta) + } else { + ## use the var vs. num cells trend + sigma <- exp(predict(sd_trend$fit, + newdata=data.frame(num_cells=num_cells))[[1]]) + + + mean_delta = ifelse(use_KS, + get_HoneyBADGER_setGexpDev(gexp.sd=sd_trend$sigma, alpha=i3_p_val, k_cells=num_cells), + determine_mean_delta_via_Z(sigma, p=i3_p_val) ) + } + + + state_emission_params = list(mean=c( + mu - mean_delta, # state 0.5 = deletion + mu, # state 1 = neutral + mu + mean_delta), # state 1.5 = amplification + + sd=c(sigma, + sigma, + sigma) # shared variance as in HB + ) + + HMM_info = list(state_transitions=state_transitions, + delta=delta, + state_emission_params=state_emission_params) + + + #print(HMM_info) + + return(HMM_info) +} + + +#' @title i3HMM_predict_CNV_via_HMM_on_indiv_cells +#' +#' @description use the i3 HMM for predicting CNV at the level of individual cells +#' +#' @param infercnv_obj infercnv object +#' +#' @param i3_p_val p-value used to determine mean for amp/del distributions +#' +#' @param sd_trend (optional) by default, computed automatically based on infercnv_obj, i3_p_val +#' +#' @param t alt state transition probability (default: 1e-6) +#' +#' @param use_KS boolean : use the KS test statistic to determine mean for amp/del dist HBadger style (default: TRUE) +#' +#' @return infercnv_obj where infercnv_obj@expr.data contains state assignments. +#' +#' @export +#' + + +i3HMM_predict_CNV_via_HMM_on_indiv_cells <- function(infercnv_obj, + i3_p_val=0.05, + sd_trend=.i3HMM_get_sd_trend_by_num_cells_fit(infercnv_obj, i3_p_val), + t=1e-6, + use_KS=TRUE) { + + flog.info("predict_CNV_via_HMM_on_indiv_cells()") + + chrs = unique(infercnv_obj@gene_order$chr) + + expr.data = infercnv_obj@expr.data + gene_order = infercnv_obj@gene_order + hmm.data = expr.data + hmm.data[,] = -1 #init to invalid state + + HMM_info <- .i3HMM_get_HMM(sd_trend, num_cells = 1, t=t, i3_p_val=i3_p_val, use_KS=use_KS) + + message(HMM_info) + + ## run through each chr separately + lapply(chrs, function(chr) { + chr_gene_idx = which(gene_order$chr == chr) + + ## run through each cell for this chromosome: + lapply(seq_len(ncol(expr.data)), function(cell_idx) { + + gene_expr_vals = as.vector(expr.data[chr_gene_idx,cell_idx]) + + hmm <- HiddenMarkov::dthmm(x=gene_expr_vals, + Pi=HMM_info[['state_transitions']], + delta=HMM_info[['delta']], + distn="norm", + pm=HMM_info[['state_emission_params']]) + + + hmm_trace <- Viterbi.dthmm.adj(hmm) + + hmm.data[chr_gene_idx,cell_idx] <<- hmm_trace + }) + }) + + infercnv_obj@expr.data <- hmm.data + + return(infercnv_obj) + +} + + + +#' @title i3HMM_predict_CNV_via_HMM_on_tumor_subclusters +#' +#' @description use the i3 HMM for predicting CNV at the level of tumor subclusters +#' +#' @param infercnv_obj infercnv object +#' +#' @param i3_p_val p-value used to determine mean for amp/del distributions +#' +#' @param sd_trend (optional) by default, computed automatically based on infercnv_obj, i3_p_val +#' +#' @param t alt state transition probability (default: 1e-6) +#' +#' @param use_KS boolean : use the KS test statistic to determine mean for amp/del dist HBadger style (default: TRUE) +#' +#' @return infercnv_obj where infercnv_obj@expr.data contains state assignments. +#' +#' @export +#' + +i3HMM_predict_CNV_via_HMM_on_tumor_subclusters <- function(infercnv_obj, + i3_p_val=0.05, + sd_trend=.i3HMM_get_sd_trend_by_num_cells_fit(infercnv_obj, i3_p_val), + t=1e-6, + use_KS=TRUE + ) { + + + flog.info(sprintf("i3HMM_predict_CNV_via_HMM_on_tumor_subclusters(i3_p_val=%g, use_KS=%s)", i3_p_val, use_KS)) + + if (is.null(infercnv_obj@tumor_subclusters)) { + flog.warn("No subclusters defined, so instead running on whole samples") + return(i3HMM_predict_CNV_via_HMM_on_whole_tumor_samples(infercnv_obj, i3_p_val, sd_trend, t, use_KS)); + } + + chrs = unique(infercnv_obj@gene_order$chr) + + expr.data = infercnv_obj@expr.data + gene_order = infercnv_obj@gene_order + hmm.data = expr.data + hmm.data[,] = -1 #init to invalid state + + tumor_subclusters <- unlist(infercnv_obj@tumor_subclusters[["subclusters"]], recursive=FALSE) + + ## add the normals, so they get predictions too: + tumor_subclusters <- c(tumor_subclusters, infercnv_obj@reference_grouped_cell_indices) + + ## run through each chr separately + lapply(chrs, function(chr) { + chr_gene_idx = which(gene_order$chr == chr) + + ## run through each cell for this chromosome: + lapply(tumor_subclusters, function(tumor_subcluster_cells_idx) { + + gene_expr_vals = rowMeans(expr.data[chr_gene_idx,tumor_subcluster_cells_idx,drop=FALSE]) + + num_cells = length(tumor_subcluster_cells_idx) + + HMM_info <- .i3HMM_get_HMM(sd_trend, num_cells=num_cells, t=t, i3_p_val=i3_p_val, use_KS=use_KS) + + hmm <- HiddenMarkov::dthmm(gene_expr_vals, + HMM_info[['state_transitions']], + HMM_info[['delta']], + "norm", + HMM_info[['state_emission_params']]) + + ## hmm_trace <- HiddenMarkov::Viterbi(hmm) + hmm_trace <- Viterbi.dthmm.adj(hmm) + + hmm.data[chr_gene_idx,tumor_subcluster_cells_idx] <<- hmm_trace + }) + }) + + infercnv_obj@expr.data <- hmm.data + + flog.info("-done predicting CNV based on initial tumor subclusters") + + return(infercnv_obj) + +} + + +#' @title i3HMM_predict_CNV_via_HMM_on_whole_tumor_samples +#' +#' @description use the i3 HMM for predicting CNV at the level of whole tumor samples +#' +#' @param infercnv_obj infercnv object +#' +#' @param i3_p_val p-value used to determine mean for amp/del distributions +#' +#' @param sd_trend (optional) by default, computed automatically based on infercnv_obj, i3_p_val +#' +#' @param t alt state transition probability (default: 1e-6) +#' +#' @param use_KS boolean : use the KS test statistic to determine mean for amp/del dist HBadger style (default: TRUE) +#' +#' @return infercnv_obj where infercnv_obj@expr.data contains state assignments. +#' +#' @export +#' + + +i3HMM_predict_CNV_via_HMM_on_whole_tumor_samples <- function(infercnv_obj, + i3_p_val=0.05, + sd_trend=.i3HMM_get_sd_trend_by_num_cells_fit(infercnv_obj, i3_p_val), + t=1e-6, + use_KS=TRUE + ) { + + + flog.info("predict_CNV_via_HMM_on_whole_tumor_samples") + + + chrs = unique(infercnv_obj@gene_order$chr) + + expr.data = infercnv_obj@expr.data + gene_order = infercnv_obj@gene_order + hmm.data = expr.data + hmm.data[,] = -1 #init to invalid state + + ## add the normals, so they get predictions too: + tumor_samples <- c(infercnv_obj@observation_grouped_cell_indices, infercnv_obj@reference_grouped_cell_indices) + + ## run through each chr separately + lapply(chrs, function(chr) { + chr_gene_idx = which(gene_order$chr == chr) + + ## run through each cell for this chromosome: + lapply(tumor_samples, function(tumor_sample_cells_idx) { + + gene_expr_vals = rowMeans(expr.data[chr_gene_idx,tumor_sample_cells_idx,drop=FALSE]) + + num_cells = length(tumor_sample_cells_idx) + + HMM_info <- .i3HMM_get_HMM(sd_trend, num_cells=num_cells, t=t, i3_p_val=i3_p_val, use_KS=use_KS) + + hmm <- HiddenMarkov::dthmm(gene_expr_vals, + HMM_info[['state_transitions']], + HMM_info[['delta']], + "norm", + HMM_info[['state_emission_params']]) + + hmm_trace <- Viterbi.dthmm.adj(hmm) + + hmm.data[chr_gene_idx,tumor_sample_cells_idx] <<- hmm_trace + }) + }) + + infercnv_obj@expr.data <- hmm.data + + flog.info("-done predicting CNV based on initial tumor subclusters") + + return(infercnv_obj) + +} + + +#' @title i3HMM_assign_HMM_states_to_proxy_expr_vals +#' +#' @description replace i3 HMM state predictions with their represented CNV levels +#' +#' @param infercnv_obj infercnv object +#' +#' @return infercnv_obj +#' +#' @export + + +i3HMM_assign_HMM_states_to_proxy_expr_vals <- function(infercnv_obj) { + + expr.data = infercnv_obj@expr.data + + expr.data[expr.data == 1] <- 0.5 + expr.data[expr.data == 2] <- 1 + expr.data[expr.data == 3] <- 1.5 + + infercnv_obj@expr.data <- expr.data + + return(infercnv_obj) + +} + + +#' @title determine_mean_delta_via_Z +#' +#' @description determine means for amp/del distributions requiring that they cross the +#' given distribution based on sigma centered at zero and at the given p value +#' +#' @param sigma standard deviation for a Normal distribution +#' +#' @param p the p-value at which the distributions should intersect +#' +#' @return delta_for_alt_mean +#' +#' @export + +determine_mean_delta_via_Z <- function(sigma, p) { + + ## want tails of the distribution to minimially overlap at the p-value + + delta = abs(qnorm(p=p, mean=0, sd=sigma)) + + flog.info(sprintf("determine mean delta (sigma: %g, p=%g) -> %g", sigma, p, delta)) + + delta_for_alt_mean = 2 * delta + + return(delta_for_alt_mean) + +} + + +#' @title get_HoneyBADGER_setGexpDev +#' +#' @description This method is adapted from HoneyBADGER's setGexpDev method +#' Essentially, using the KS test to determine where to set the +#' amp/del means for the distributions. +#' +#' +#' @param gexp.sd standard deviation for all genes +#' +#' @param alpha the targeted p-value +#' +#' @param k_cells number of cells to sample from the distribution +#' +#' @param n_iter number random iterations for sampling from the distribution (default: 100) +#' +#' @param plot boolean, set to True to plot. +#' +#' @return optim.dev +#' +#' @noRd + +get_HoneyBADGER_setGexpDev <- function(gexp.sd, alpha, k_cells=2, n_iter=100, plot=FALSE) { + + if (k_cells < 2) { + flog.warn("get_HoneyBADGER_setGexpDev:: k_cells must be at least 2, setting to 2") + k_cells = 2 + } + + devs <- seq(0, gexp.sd, gexp.sd/10) + pvs <- unlist(lapply(devs, function(dev) { + mean(unlist(lapply(seq_len(n_iter), function(i) { + pv <- ks.test(rnorm(k_cells, 0, gexp.sd), rnorm(k_cells, dev, gexp.sd)) + pv$p.value + }))) + })) + if(plot) { + plot(pvs, devs, xlab="p-value", ylab="deviation", xlim=c(0,1)) + } + fit <- lm(devs ~ pvs) + + optim.dev <- predict(fit, newdata=data.frame(pvs=alpha)) + + #flog.info(sprintf("-get_HoneyBADGER_setGexpDev(sigma=%g, alpha=%g, k_cells=%g) = %g", gexp.sd, alpha, k_cells, optim.dev)) + + return(optim.dev) +} diff --git a/R/inferCNV_mask_non_DE.R b/R/inferCNV_mask_non_DE.R index 28976ded..bc081516 100644 --- a/R/inferCNV_mask_non_DE.R +++ b/R/inferCNV_mask_non_DE.R @@ -1,58 +1,6 @@ #' @include inferCNV.R NULL - - -#' @title .mask_DE_genes() -#' -#' @description private function that does the actual masking of expression values in the matrix -#' according to the specified mask value -#' -#' @param infercnv_obj infercnv_object -#' -#' @param all_DE_results DE results list with structure: -#' all_DE_results[[ tumor_type ]] = list(tumor=tumor_type, de_genes=c(geneA, geneB, ...)) -#' -#' @param mask_val float value to assign to genes that are in the mask set (complement of the DE gene set) -#' -#' @return infercnv_obj -#' -#' @keywords internal -#' @noRd -#' - -.mask_DE_genes <- function(infercnv_obj, all_DE_results, mask_val) { - - - all_DE_genes_matrix = matrix(data=FALSE, nrow=nrow(infercnv_obj@expr.data), - ncol=ncol(infercnv_obj@expr.data), - dimnames = list(rownames(infercnv_obj@expr.data), - colnames(infercnv_obj@expr.data) ) ) - - - - all_DE_genes_matrix[,] = FALSE - - ## turn on all normal genes - all_DE_genes_matrix[, unlist(infercnv_obj@reference_grouped_cell_indices)] = TRUE - - for (DE_results in all_DE_results) { - tumor_type = DE_results$tumor - genes = DE_results$de_genes - - - all_DE_genes_matrix[rownames(all_DE_genes_matrix) %in% genes, - infercnv_obj@observation_grouped_cell_indices[[ tumor_type ]] ] = TRUE - - } - - infercnv_obj@expr.data[ ! all_DE_genes_matrix ] = mask_val - - return(infercnv_obj) - - -} - #' @title mask_non_DE_genes_basic() #' #' @description Mask gene expression in infercnv_obj based on expression values that are found @@ -69,6 +17,8 @@ NULL #' #' @param center_val value to assign to those genes that are not found to be statistically DE. #' +#' @param require_DE_all_normals mask gene if found significantly DE in each normal comparison (default="any") options("any", "most", "all") +#' #' @return infercnv_obj #' #' @export @@ -77,20 +27,109 @@ NULL mask_non_DE_genes_basic <- function(infercnv_obj, p_val_thresh = 0.05, test.use="wilcoxon", - center_val=mean(infercnv_obj@expr.data) ) { + center_val=mean(infercnv_obj@expr.data), + require_DE_all_normals="any" + ) { + tumor_groupings = infercnv_obj@observation_grouped_cell_indices + all_DE_results = get_DE_genes_basic(infercnv_obj, p_val_thresh=p_val_thresh, test.use=test.use) + #save('all_DE_results', file='all_DE_results.obj') + infercnv_obj <- .mask_DE_genes(infercnv_obj, - all_DE_results, - mask_val=center_val) + all_DE_results=all_DE_results, + mask_val=center_val, + require_DE_all_normals=require_DE_all_normals) + + return(infercnv_obj) +} + + + +#' @title .mask_DE_genes() +#' +#' @description private function that does the actual masking of expression values in the matrix +#' according to the specified mask value +#' +#' @param infercnv_obj infercnv_object +#' +#' @param all_DE_results DE results list with structure: +#' all_DE_results[[ tumor_type ]] = list(tumor=tumor_type, de_genes=c(geneA, geneB, ...)) +#' +#' @param mask_val float value to assign to genes that are in the mask set (complement of the DE gene set) +#' +#' @param min_cluster_size_mask clusters smaller than this size are automatically retained (unmasked). default=5 +#' +#' @return infercnv_obj +#' +#' @keywords internal +#' @noRd +#' + +.mask_DE_genes <- function(infercnv_obj, + all_DE_results, + mask_val, + require_DE_all_normals, # any, most, all + min_cluster_size_mask=5) { + + all_DE_genes_matrix = matrix(data=0, nrow=nrow(infercnv_obj@expr.data), + ncol=ncol(infercnv_obj@expr.data), + dimnames = list(rownames(infercnv_obj@expr.data), + colnames(infercnv_obj@expr.data) ) ) + + + num_normal_types = length(names(infercnv_obj@reference_grouped_cell_indices)) + + ## turn on all normal genes + all_DE_genes_matrix[, unlist(infercnv_obj@reference_grouped_cell_indices)] = num_normal_types + + ## retain small clusters that are unlikely to show up as DE + #for (tumor_type in names(tumor_groupings)) { + # tumor_idx = tumor_groupings[[tumor_type]] + # if (length(tumor_idx) < min_cluster_size_mask) { + # all_DE_genes_matrix[, tumor_idx] = num_normal_types + # } + #} + for (DE_results in all_DE_results) { + if (length(DE_results$tumor_indices) < min_cluster_size_mask) { + all_DE_genes_matrix[, DE_results$tumor_indices] = num_normal_types + } + } + + for (DE_results in all_DE_results) { + # tumor_type = DE_results$tumor + genes = DE_results$de_genes + + gene_idx = rownames(all_DE_genes_matrix) %in% genes + # cell_idx = tumor_groupings[[ tumor_type ]] + cell_idx = DE_results$tumor_indices + + if (length(cell_idx) >= min_cluster_size_mask) { + all_DE_genes_matrix[gene_idx, cell_idx] = all_DE_genes_matrix[gene_idx, cell_idx] + 1 + } + } + + if (require_DE_all_normals == "all") { + ## must be found in each of the tumor vs (normal_1, normal_2, ..., normal_N) DE comparisons to not be masked. + infercnv_obj@expr.data[ all_DE_genes_matrix != num_normal_types ] = mask_val + } else if ( require_DE_all_normals == "most") { + infercnv_obj@expr.data[ all_DE_genes_matrix < num_normal_types/2 ] = mask_val + } else if ( require_DE_all_normals == "any") { + ## masking if not found DE in any comparison + infercnv_obj@expr.data[ all_DE_genes_matrix == 0 ] = mask_val + } else { + stop(sprintf("Error, not recognizing require_DE_all_normals=%s", require_DE_all_normals)) + } + return(infercnv_obj) + } @@ -118,11 +157,7 @@ get_DE_genes_basic <- function(infercnv_obj, p_val_thresh = 0.05, test.use="wilcoxon" # other options: (wilcoxon, t, perm) ) { - - ## Find DE genes by comparing the mutant types to normal types - normal_types = names(infercnv_obj@reference_grouped_cell_indices) - tumor_types = names(infercnv_obj@observation_grouped_cell_indices) - + all_DE_results = list() statfxns = list() @@ -155,7 +190,11 @@ get_DE_genes_basic <- function(infercnv_obj, vals1 = x[idx1] vals2 = x[idx2] - + + ## force break ties by adding random noise + vals1 = vals1 + rnorm(n=length(vals1), mean=0.0001, sd=0.0001) + vals2 = vals2 + rnorm(n=length(vals2), mean=0.0001, sd=0.0001) + w = wilcox.test(vals1, vals2) return(w$p.value) @@ -163,36 +202,53 @@ get_DE_genes_basic <- function(infercnv_obj, statfxn = statfxns[[ test.use ]] - - ## turn on only DE genes in tumors - for (tumor_type in tumor_types) { - tumor_indices = infercnv_obj@observation_grouped_cell_indices[[ tumor_type ]] + ## Find DE genes by comparing the mutant types to normal types + normal_types = names(infercnv_obj@reference_grouped_cell_indices) + + ## turn on only DE genes in tumors + tumor_groupings = infercnv_obj@observation_grouped_cell_indices + for (tumor_type in names(tumor_groupings)) { + + indices = infercnv_obj@tumor_subclusters[["subclusters"]][[ tumor_type ]] + if(is.list(indices)) { + tumor_indices_list = indices + } + else { # is.vector(indices) + tumor_indices_list = list(indices) + } - for (normal_type in normal_types) { - flog.info(sprintf("Finding DE genes between %s and %s", tumor_type, normal_type)) - - normal_indices = infercnv_obj@reference_grouped_cell_indices[[ normal_type ]] - - - pvals = apply(infercnv_obj@expr.data, 1, statfxn, idx1=normal_indices, idx2=tumor_indices) - pvals = unlist(pvals) - pvals = p.adjust(pvals, method="BH") - - names(pvals) = rownames(infercnv_obj@expr.data) - - genes = names(pvals)[pvals 0) { + logm = log(m+1) + pred_log_var = predict(mean_var_spline, logm)$y + + var = max(exp(pred_log_var)-1, 0) + + val = round(max(rnorm(n=1, mean=m, sd=sqrt(var)), 0)) + + if ( (! is.null(dropout_logistic_params)) & val > 0) { + + dropout_prob <- predict(dropout_logistic_params$spline, log(val))$y[1] + + if (runif(1) <= dropout_prob) { + ## a drop-out + val = 0 + } + } + } + + return(val) +} + + +.sim_expr_val_mean_var_no_dropout <- function(m, mean_var_spline) { + + val = 0 + if (m > 0) { + logm = log(m+1) + pred_log_var = predict(mean_var_spline, logm)$y + + var = max(exp(pred_log_var)-1, 0) + + val = round(max(rnorm(n=1, mean=m, sd=sqrt(var)), 0)) + + } + + return(val) +} + + +.apply_dropout <- function(counts.matrix, dropout_logistic_params) { + + + + counts.matrix <- apply(counts.matrix, 1, function(x) { + + mean.val = mean(x) + dropout_prob <- predict(dropout_logistic_params$spline, log(mean.val))$y[1] + + nzeros = sum(x==0) + ntotal = length(x) + nremaining = ntotal - nzeros + + # padj = ( (pzero*total) - (current_nzero) ) / remaining + + padj = ( (dropout_prob * ntotal) - (nzeros) ) / nremaining + padj = max(padj, 0) + + flog.debug(sprintf("mean.val: %g, dropout_prob: %g, adj_dropout_prob: %g", + mean.val, + dropout_prob, + padj)) + + x.adj = sapply(x, function(y) { + if(runif(1) <= padj) { + return(0) + } else { + return(y) + } + + } ) + + x.adj + + }) + + return(t(counts.matrix)) + + +} + + + +##' .get_mean_var_table() +##' +##' Computes the gene mean/variance table based on all defined cell groupings (reference and observations) +##' +##' @param infercnv_obj An infercnv object populated with raw count data +##' +##' @return data.frame with 3 columns: group_name, mean, variance +##' +##' +##' @keywords internal +##' @noRd +##' + +.get_mean_var_table <- function(infercnv_obj) { + + group_indices = c(infercnv_obj@observation_grouped_cell_indices, infercnv_obj@reference_grouped_cell_indices) + + mean_variance_table <- .get_mean_var_given_matrix(infercnv_obj@expr.data, group_indices) + + return(mean_variance_table) + +} + + +.get_mean_var_given_matrix <- function(expr.matrix, cell_cluster_groupings=NULL) { + + if (is.null(cell_cluster_groupings)) { + ## use all cells + cell_cluster_groupings = list(allcells=seq(ncol(expr.matrix))) + } + + mean_var_table <- NULL + + for (group_name in names(cell_cluster_groupings)) { + + expr.data = expr.matrix[, cell_cluster_groupings[[ group_name ]] ] + m = rowMeans(expr.data) + v = apply(expr.data, 1, var) + if (is.null(mean_var_table)) { + mean_var_table = data.frame(g=group_name, m=m, v=v) + } else { + mean_var_table = rbind(mean_var_table, data.frame(g=group_name, m=m, v=v)) + } + } + + return(mean_var_table) +} + +##' .get_spike_in_average_bounds() +##' +##' return mean bounds for expression of all cells in the spike-in +##' +##' @param infercnv_obj An infercnv object populated with raw count data +##' +##' @return c(left_bound, right_bound) +##' +##' @keywords internal +##' @noRd +##' + diff --git a/R/inferCNV_ops.R b/R/inferCNV_ops.R index d841e211..6f3411a1 100755 --- a/R/inferCNV_ops.R +++ b/R/inferCNV_ops.R @@ -1,79 +1,173 @@ #' Function doing the actual analysis before calling the plotting functions. #' -#' @title run() : Invokes a routine inferCNV analysis to Infer CNV changes given a matrix of RNASeq counts. +#' @title run() : Invokes a routine inferCNV analysis to Infer CNV changes given a matrix of RNASeq counts. #' #' @param infercnv_obj An infercnv object populated with raw count data -#' +#' #' @param cutoff Cut-off for the min average read counts per gene among reference cells. (default: 1) #' -#' @param min_cells_per_gene minimum number of reference cells requiring expression measurements to include the corresponding gene. -#' default: 3 -#' -#' @param out_dir path to directory to deposit outputs (default: '.') +#' @param min_cells_per_gene minimum number of reference cells requiring expression measurements to include the corresponding gene. +#' default: 3 #' -#' @param normalize_factor scaling factor for total sum of counts (default: NA, in which case -#' will be set = 10^round(log10(mean(colSums))), typically setting to 1e5 +#' @param out_dir path to directory to deposit outputs (default: '.') #' +#' ## Smoothing params #' @param window_length Length of the window for the moving average #' (smoothing). Should be an odd integer. (default: 101)#' #' +#' @param smooth_method Method to use for smoothing: c(runmeans,pyramidinal) default: pyramidinal +#' +#' ##### +#' #' @param num_ref_groups The number of reference groups or a list of #' indices for each group of reference indices in #' relation to reference_obs. (default: NULL) #' +#' @param ref_subtract_use_mean_bounds Determine means separately for each ref group, then remove intensities within bounds of means (default: TRUE) +#' Otherwise, uses mean of the means across groups. +#' +#' ############################# +#' +#' @param cluster_by_groups If observations are defined according to groups (ie. patients), each group +#' of cells will be clustered separately. (default=FALSE, instead will use k_obs_groups setting) +#' +#' +#' @param k_obs_groups Number of groups in which to break the observations. (default: 1) +#' +#' +#' +#' @param hclust_method Method used for hierarchical clustering of cells. Valid choices are: +#' "ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "median", "centroid". +#' default("ward.D2") +#' #' @param max_centered_threshold The maximum value a value can have after #' centering. Also sets a lower bound of -#' -1 * this value. +#' -1 * this value. (default: 3), +#' can set to a numeric value or "auto" to bound by the mean bounds across cells. +#' Set to NA to turn off. +#' +#' @param scale_data perform Z-scaling of logtransformed data (default: FALSE). This may be turned on if you have +#' very different kinds of data for your normal and tumor samples. For example, you need to use GTEx +#' representative normal expression profiles rather than being able to leverage normal single cell data +#' that goes with your experiment. +#' +#' ######################################################################### +#' ## Downstream Analyses (HMM or non-DE-masking) based on tumor subclusters +#' +#' @param HMM when set to True, runs HMM to predict CNV level (default: FALSE) +#' +#' @param HMM_transition_prob transition probability in HMM (default: 1e-6) +#' +#' +#' @param HMM_report_by cell, consensus, subcluster (default: subcluster) Note, reporting is performed entirely separately from the HMM prediction. So, you can predict on subclusters, but get per-cell level reporting (more voluminous output). +#' +#' +#' @param HMM_type HMM model type. Options: (i6 or i3): +#' i6: infercnv 6-state model (0, 0.5, 1, 1.5, 2, >2) where state emissions are calibrated based on simulated CNV levels. +#' i3: infercnv 3-state model (del, neutral, amp) configured based on normal cells and HMM_i3_pval +#' +#' @param HMM_i3_pval p-value for HMM i3 state overlap (default: 0.05) +#' +#' +#' @param HMM_i3_use_KS boolean: use the KS test statistic to estimate mean of amp/del distributions (ala HoneyBadger). (default=TRUE) +#' +#' +#' ## Filtering low-conf HMM preds via BayesNet P(Normal) +#' +#' @param BayesMaxPNormal maximum P(Normal) allowed for a CNV prediction according to BayesNet. (default=0.5, note zero turns it off) +#' +#' ###################### +#' ## Tumor subclustering +#' +#' @param analysis_mode options(samples|subclusters|cells), Grouping level for image filtering or HMM predictions. +#' default: samples (fastest, but subclusters is ideal) +#' +#' @param tumor_subcluster_partition_method method for defining tumor subclusters. Options('random_trees', 'qnorm') +#' random_trees: (default) slow but best. Uses permutation statistics w/ tree construction. +#' qnorm: defines tree height based on the quantile defined by the tumor_subcluster_pval +#' +#' @param tumor_subcluster_pval max p-value for defining a significant tumor subcluster (default: 0.1) +#' +#' +#' +#' ############################# +#' ## de-noising parameters #### +#' +#' @param denoise If True, turns on denoising according to options below #' #' @param noise_filter Values +- from the reference cell mean will be set to zero (whitening effect) #' default(NA, instead will use sd_amplifier below. #' #' @param sd_amplifier Noise is defined as mean(reference_cells) +- sdev(reference_cells) * sd_amplifier #' default: 1.0 -#' -#' @param cluster_by_groups If observations are defined according to groups (ie. patients), each group -#' of cells will be clustered separately. (default=FALSE, instead will use k_obs_groups setting) -#' #' -#' @param k_obs_groups Number of groups in which to break the observations. (default: 1) +#' @param noise_logistic use the noise_filter or sd_amplifier based threshold (whichever is invoked) as the midpoint in a +#' logistic model for downscaling values close to the mean. (default: FALSE) +#' +#' +#' ################## +#' ## Outlier pruning #' #' @param outlier_method_bound Method to use for bounding outlier values. (default: "average_bound") #' Will preferentially use outlier_lower_bounda and outlier_upper_bound if set. -#' @param outlier_lower_bound Outliers below this lower bound will be set to this value. +#' @param outlier_lower_bound Outliers below this lower bound will be set to this value. #' @param outlier_upper_bound Outliers above this upper bound will be set to this value. #' #' -#' @param hclust_method Method used for hierarchical clustering of cells. Valid choices are: -#' "ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "median", "centroid". +#' ########################## +#' ## Misc options +#' +#' @param final_scale_limits The scale limits for the final heatmap output by the run() method. Default "auto". Alt, c(low,high) #' -#' @param anscombe_normalize Perform anscombe normalization on normalized counts before log transformation. -#' -#' @param use_zscores If true, converts log(expression) data to zscores based on reference cell expr distribution. -#' -#' @param remove_genes_at_chr_ends If true, removes the window_length/2 genes at both ends of the chromosome. +#' @param final_center_val Center value for final heatmap output by the run() method. +#' +#' @param debug If true, output debug level logging. +#' +#' @param num_threads (int) number of threads for parallel steps (default: 4) +#' +#' @param plot_steps If true, saves infercnv objects and plots data at the intermediate steps. +#' +#' @param resume_mode leverage pre-computed and stored infercnv objects where possible. (default=TRUE) +#' +#' @param png_res Resolution for png output. +#' +#' @param no_plot don't make any of the images. Instead, generate all non-image outputs as part of the run. (default: FALSE) +#' +#' @param no_prelim_plot don't make the preliminary infercnv image (default: FALSE) +#' +#' @param plot_probabilities option to plot posterior probabilities (default: TRUE) +#' +#' @param diagnostics option to create diagnostic plots after running the Bayesian model (default: FALSE) +#' +#' ####################### +#' ## Experimental options +#' +#' @param remove_genes_at_chr_ends experimental option: If true, removes the window_length/2 genes at both ends of the chromosome. +#' +#' @param prune_outliers Define outliers loosely as those that exceed the mean boundaries among all cells. These are set to the bounds. +#' +#' ## experimental opts involving DE analysis #' #' @param mask_nonDE_genes If true, sets genes not significantly differentially expressed between tumor/normal to -#' the mean value for the complete data set +#' the mean value for the complete data set (default: 0.05) #' #' @param mask_nonDE_pval p-value threshold for defining statistically significant DE genes between tumor/normal # #' @param test.use statistical test to use. (default: "wilcoxon") alternatives include 'perm' or 't'.' -#' -#' @return infercnv_obj containing filtered and transformed data #' -#' @param plot_steps If true, saves infercnv objects and plots data at the intermediate steps. +#' @param require_DE_all_normals If mask_nonDE_genes is set, those genes will be masked only if they are are found as DE according to test.use and mask_nonDE_pval in each of the comparisons to normal cells options: {"any", "most", "all"} (default: "any") #' -#' @param include.spike If true, introduces an artificial spike-in of data at ~0x and 2x for scaling residuals between 0-2. (default: F) +#' other experimental opts #' -#' @param spike_in_chrs vector listing of chr names to use for modeling spike-ins (default: NULL - uses the two largest chrs. ex. c('chr1', 'chr2') ) +#' @param sim_method method for calibrating CNV levels in the i6 HMM (default: 'meanvar') #' -#' @param spike_in_multiplier vector of weights matching spike_in_chrs (default: c(0.01, 2.0) for modeling loss/gain of both chrs) +#' @param sim_foreground don't use... for debugging, developer option. #' -#' @param pseudocount Number of counts to add to each gene of each cell post-filtering of genes and cells and pre-total sum count normalization. (default: 0) +#' @param hspike_aggregate_normals instead of trying to model the different normal groupings individually, just merge them in the hspike. +#' +#' @return infercnv_obj containing filtered and transformed data #' -#' @param debug If true, output debug level logging. -#' #' @export #' @@ -82,719 +176,1020 @@ run <- function(infercnv_obj, # gene filtering settings cutoff=1, min_cells_per_gene=3, - + out_dir=".", - normalize_factor=NA, - window_length=101, - - num_ref_groups=NULL, - max_centered_threshold=NA, + ## smoothing params + window_length=101, + smooth_method=c('pyramidinal', 'runmeans'), - # noise settings - noise_filter=NA, - sd_amplifier = 1.5, + num_ref_groups=NULL, + ref_subtract_use_mean_bounds=TRUE, # observation cell clustering settings cluster_by_groups=FALSE, k_obs_groups=1, + hclust_method='ward.D2', + + max_centered_threshold=3, # or set to a specific value or "auto", or NA to turn off + scale_data=FALSE, + + ## HMM opts + HMM=FALSE, # turn on to auto-run the HMM prediction of CNV levels + ## tumor subclustering opts + HMM_transition_prob=1e-6, + HMM_report_by=c("subcluster","consensus","cell"), + HMM_type=c('i6', 'i3'), + HMM_i3_pval=0.05, + HMM_i3_use_KS=TRUE, + BayesMaxPNormal=0.5, + + ## some experimental params + #sim_method=c('meanvar', 'simple', 'splatter'), ## only meanvar supported, others experimental + sim_method='meanvar', + sim_foreground=FALSE, ## experimental + + + ## tumor subclustering options + analysis_mode=c('samples', 'subclusters', 'cells'), # for filtering and HMM + tumor_subcluster_partition_method=c('random_trees', 'qnorm', 'pheight', 'qgamma', 'shc'), + tumor_subcluster_pval=0.1, + + + ## noise settings + denoise=FALSE, + noise_filter=NA, + sd_amplifier = 1.5, + noise_logistic=FALSE, # if false, does complete 'noise' elimination. # outlier adjustment settings outlier_method_bound="average_bound", outlier_lower_bound=NA, outlier_upper_bound=NA, - hclust_method='complete', + ## misc options + final_scale_limits = NULL, + final_center_val = NULL, + debug=FALSE, #for debug level logging + num_threads = 4, + plot_steps=FALSE, + resume_mode=TRUE, + png_res=300, + plot_probabilities = TRUE, + diagnostics = FALSE, - anscombe_normalize=TRUE, - use_zscores=FALSE, + ## experimental options remove_genes_at_chr_ends=FALSE, + prune_outliers=FALSE, mask_nonDE_genes=FALSE, - mask_nonDE_pval=0.05, + mask_nonDE_pval=0.05, # use permissive threshold test.use='wilcoxon', - - plot_steps=FALSE, + require_DE_all_normals="any", - debug=FALSE, #for debug level logging - include.spike = FALSE, + hspike_aggregate_normals = FALSE, - # must specify both below if to be used, and must match in vec length - spike_in_chrs = NULL, # use defaults - spike_in_multiplier_vec = NULL, # use defaults - - pseudocount = 0 - - ) { + no_plot = FALSE, + no_prelim_plot = FALSE + +) { + smooth_method = match.arg(smooth_method) + HMM_report_by = match.arg(HMM_report_by) + analysis_mode = match.arg(analysis_mode) + tumor_subcluster_partition_method = match.arg(tumor_subcluster_partition_method) + HMM_type = match.arg(HMM_type) + if (debug) { flog.threshold(DEBUG) + } else { + flog.threshold(INFO) } flog.info(paste("::process_data:Start", sep="")) - + + infercnv.env$GLOBAL_NUM_THREADS <- num_threads if(out_dir != "." & !file.exists(out_dir)){ dir.create(out_dir) } - - step_count = 0; - + + step_count = 0; + step_count = step_count + 1 flog.info(sprintf("\n\n\tSTEP %d: incoming data\n", step_count)) - # Plot incremental steps. - if (plot_steps) { - - infercnv_obj_incoming_data <- infercnv_obj - save('infercnv_obj_incoming_data', file=file.path(out_dir, sprintf("%02d_incoming_data.infercnv_obj", step_count))) - + infercnv_obj_file=file.path(out_dir, sprintf("%02d_incoming_data.infercnv_obj", step_count)) + if (! (resume_mode & file.exists(infercnv_obj_file)) ) { + saveRDS(infercnv_obj, infercnv_obj_file) } - - - ################################################### + + ## ################################################# ## Step: removing insufficiently expressed genes step_count = step_count + 1 flog.info(sprintf("\n\n\tSTEP %02d: Removing lowly expressed genes\n", step_count)) - # Remove genes that aren't sufficiently expressed, according to min mean count cutoff. - # Examines the original (non-log-transformed) data, gets mean for each gene, and removes genes - # with mean values below cutoff. - - infercnv_obj <- require_above_min_mean_expr_cutoff(infercnv_obj, cutoff) - - ## require each gene to be present in a min number of cells for ref sets + ## Remove genes that aren't sufficiently expressed, according to min mean count cutoff. + ## Examines the original (non-log-transformed) data, gets mean for each gene, and removes genes + ## with mean values below cutoff. - infercnv_obj <- require_above_min_cells_ref(infercnv_obj, min_cells_per_gene=min_cells_per_gene) + infercnv_obj_file = file.path(out_dir, sprintf("%02d_reduced_by_cutoff.infercnv_obj",step_count)) - - if (plot_steps){ + if (resume_mode & file.exists(infercnv_obj_file) ) { + flog.info(sprintf("-restoring infercnv_obj from %s", infercnv_obj_file)) + infercnv_obj = readRDS(infercnv_obj_file) + } else { - infercnv_obj_low_expr_genes_pruned <- infercnv_obj + infercnv_obj <- require_above_min_mean_expr_cutoff(infercnv_obj, cutoff) - save('infercnv_obj_low_expr_genes_pruned', file=file.path(out_dir, sprintf("%02d_reduced_by_cutoff.infercnv_obj",step_count))) + ## require each gene to be present in a min number of cells for ref sets - } - - - #################################################### - ## Adding pseudocounts - - if (pseudocount != 0) { - flog.info(sprintf("Adding pseudocount: %g", pseudocount)) - infercnv_obj <- add_pseudocount(infercnv_obj, pseudocount) - } + infercnv_obj <- require_above_min_cells_ref(infercnv_obj, min_cells_per_gene=min_cells_per_gene) + saveRDS(infercnv_obj, file=infercnv_obj_file) + } - ########################################### - ### STEP: normalization by sequencing depth + ## ######################################### + ## # STEP: normalization by sequencing depth + step_count = step_count + 1 flog.info(sprintf("\n\n\tSTEP %02d: normalization by sequencing depth\n", step_count)) - infercnv_obj <- normalize_counts_by_seq_depth(infercnv_obj, normalize_factor=normalize_factor) - - if (plot_steps){ - - infercnv_obj_normalize_by_depth <- infercnv_obj - save('infercnv_obj_normalize_by_depth', file=file.path(out_dir, sprintf("%02d_normalized_by_depth.infercnv_obj", step_count))) - - } - - ################################################## - ## spike-in + resume_file_token = ifelse( (HMM), paste0("HMM",HMM_type), "") - if (include.spike) { - step_count = step_count + 1 - flog.info(sprintf("\n\n\tSTEP %02d: Spiking in genes with variation added for tracking\n", step_count)) - - if (! (is.null(spike_in_chrs) && is.null(spike_in_multiplier_vec)) ) { - infercnv_obj <- spike_in_variation_chrs(infercnv_obj, spike_in_chrs, spike_in_multiplier_vec) - } else { - infercnv_obj <- spike_in_variation_chrs(infercnv_obj) - } + infercnv_obj_file = file=file.path(out_dir, sprintf("%02d_normalized_by_depth%s.infercnv_obj", step_count, resume_file_token)) + + if (resume_mode & file.exists(infercnv_obj_file)) { + flog.info(sprintf("-restoring infercnv_obj from %s", infercnv_obj_file)) + infercnv_obj <- readRDS(infercnv_obj_file) - # Plot incremental steps. - if (plot_steps){ - - infercnv_obj_spiked <- infercnv_obj - save('infercnv_obj_spiked', file=file.path(out_dir, sprintf("%02d_spiked.infercnv_obj", step_count))) - - plot_cnv(infercnv_obj=infercnv_obj, - k_obs_groups=k_obs_groups, - cluster_by_groups=cluster_by_groups, - out_dir=out_dir, - color_safe_pal=FALSE, - x.center=mean(infercnv_obj@expr.data), - x.range="auto", - title=sprintf("%02d_spike_added",step_count), - obs_title="Observations (Cells)", - ref_title="References (Cells)", - output_filename=sprintf("infercnv.%02d_spike_added",step_count), - write_expr_matrix=TRUE - ) - } + } else { + infercnv_obj <- normalize_counts_by_seq_depth(infercnv_obj) - } - - - ################################## - ##### STEP: anscombe normalization - - if (anscombe_normalize) { - step_count = step_count + 1 - flog.info(sprintf("\n\n\tSTEP %02d: anscombe normalization\n", step_count)) - - infercnv_obj <- anscombe_transform(infercnv_obj) - - if (plot_steps) { - infercnv_obj_anscombe_norm <- infercnv_obj - save('infercnv_obj_anscombe_norm', file=file.path(out_dir, sprintf("%02d_anscombe_normalization.infercnv_obj", step_count))) - - plot_cnv(infercnv_obj=infercnv_obj, - k_obs_groups=k_obs_groups, - cluster_by_groups=cluster_by_groups, - out_dir=out_dir, - color_safe_pal=FALSE, - x.center=mean(infercnv_obj@expr.data), - x.range="auto", - title=sprintf("%02d_anscombe_norm",step_count), - obs_title="Observations (Cells)", - ref_title="References (Cells)", - output_filename=sprintf("infercnv.%02d_anscombe_norm",step_count), - write_expr_matrix=TRUE - ) + if (HMM && HMM_type == 'i6') { + ## add in the hidden spike needed by the HMM + infercnv_obj <- .build_and_add_hspike(infercnv_obj, sim_method=sim_method, aggregate_normals=hspike_aggregate_normals) + if (sim_foreground) { + infercnv_obj <- .sim_foreground(infercnv_obj, sim_method=sim_method) + } } + saveRDS(infercnv_obj, infercnv_obj_file) } + - ########################### + ## ######################### ## Step: log transformation - + step_count = step_count + 1 - flog.info(sprintf("\n\n\tSTEP %02d: log transformation of data\n", step_count)) - - infercnv_obj <- log2xplus1(infercnv_obj) - - # Plot incremental steps. - if (plot_steps){ - - infercnv_obj_log_transformed <- infercnv_obj - save('infercnv_obj_log_transformed', file=file.path(out_dir, sprintf("%02d_logtransformed.infercnv_obj", step_count))) - - plot_cnv(infercnv_obj=infercnv_obj, - k_obs_groups=k_obs_groups, - cluster_by_groups=cluster_by_groups, - out_dir=out_dir, - color_safe_pal=FALSE, - x.center=mean(infercnv_obj@expr.data), - x.range="auto", - title=sprintf("%02d_log_transformed_data",step_count), - obs_title="Observations (Cells)", - ref_title="References (Cells)", - output_filename=sprintf("infercnv.%02d_log_transformed",step_count), - write_expr_matrix=TRUE - ) - } + flog.info(sprintf("\n\n\tSTEP %02d: log transformation of data\n", step_count)) + infercnv_obj_file = file.path(out_dir, sprintf("%02d_logtransformed%s.infercnv_obj", step_count, resume_file_token)) - - ############################### - ### STEP: ZScore transformation - - if (use_zscores) { + if (resume_mode & file.exists(infercnv_obj_file)) { + flog.info(sprintf("-restoring infercnv_obj from %s", infercnv_obj_file)) + infercnv_obj <- readRDS(infercnv_obj_file) + } else { - step_count = step_count + 1 - flog.info(sprintf("\n\n\tSTEP %02d: Z-score transformation of data\n", step_count)) + infercnv_obj <- log2xplus1(infercnv_obj) - infercnv_obj <- transform_to_reference_based_Zscores(infercnv_obj) + saveRDS(infercnv_obj, + file=infercnv_obj_file) + ## Plot incremental steps. if (plot_steps){ - infercnv_obj_zscores <- infercnv_obj - - save('infercnv_obj_zscores', file=file.path(out_dir, sprintf("%02d_Z-scores.infercnv_obj", step_count))) - plot_cnv(infercnv_obj=infercnv_obj, k_obs_groups=k_obs_groups, cluster_by_groups=cluster_by_groups, out_dir=out_dir, - color_safe_pal=FALSE, - x.center=0, - x.range="auto", - title=sprintf("%02d_centering_gene_expr",step_count), - obs_title="Observations (Cells)", - ref_title="References (Cells)", - output_filename=sprintf("infercnv.%02d_centering_gene_expr",step_count), - write_expr_matrix=TRUE) - + title=sprintf("%02d_log_transformed_data",step_count), + output_filename=sprintf("infercnv.%02d_log_transformed",step_count), + write_expr_matrix=TRUE, + png_res=png_res + ) } } - - - ####################################################### - ## Apply maximum centered expression thresholds to data - # Cap values between threshold and -threshold, retaining earlier center - - step_count = step_count + 1 - flog.info(sprintf("\n\n\tSTEP %02d: apply max centered expression threshold\n", step_count)) - - threshold = max_centered_threshold - if (is.na(max_centered_threshold)) { - threshold = mean(abs(get_average_bounds(infercnv_obj))) - } - infercnv_obj <- apply_max_threshold_bounds(infercnv_obj, threshold=threshold) - # Plot incremental steps. - if (plot_steps){ - - infercnv_obj_max_centered_expr <- infercnv_obj + if (scale_data) { - save('infercnv_obj_max_centered_expr', file=file.path(out_dir, sprintf("%02d_apply_max_centered_expr_threshold.infercnv_obj", step_count))) - - plot_cnv(infercnv_obj, - k_obs_groups=k_obs_groups, - cluster_by_groups=cluster_by_groups, - out_dir=out_dir, - color_safe_pal=FALSE, - x.center=mean(infercnv_obj@expr.data), - x.range="auto", - title=sprintf("%02d_apply_max_centered_expr_threshold",step_count), - obs_title="Observations (Cells)", - ref_title="References (Cells)", - output_filename=sprintf("infercnv.%02d_apply_max_centred_expr_threshold",step_count), - write_expr_matrix=TRUE) + step_count = step_count + 1 + flog.info(sprintf("\n\n\tSTEP %02d: scaling all expression data\n", step_count)) + infercnv_obj_file=file.path(out_dir, sprintf("%02d_scaled%s.infercnv_obj", step_count, resume_file_token)) + + if (resume_mode & file.exists(infercnv_obj_file)) { + flog.info(sprintf("-restoring infercnv_obj from %s", infercnv_obj_file)) + infercnv_obj <- readRDS(infercnv_obj_file) + } else { + + infercnv_obj <- scale_infercnv_expr(infercnv_obj) + + saveRDS(infercnv_obj, file=infercnv_obj_file) + + ## Plot incremental steps. + if (plot_steps){ + + plot_cnv(infercnv_obj, + k_obs_groups=k_obs_groups, + cluster_by_groups=cluster_by_groups, + out_dir=out_dir, + title=sprintf("%02d_scaled",step_count), + output_filename=sprintf("infercnv.%02d_scaled",step_count), + write_expr_matrix=TRUE, + png_res=png_res) + + } + } } - ########################################################################### - # Step: For each cell, smooth the data along chromosome with gene windows - - step_count = step_count + 1 - flog.info(sprintf("\n\n\tSTEP %02d: Smoothing data per cell by chromosome\n", step_count)) - - infercnv_obj <- smooth_by_chromosome(infercnv_obj, window_length=window_length, smooth_ends=TRUE) - + ## ################################################# + ## Step: Split the reference data into groups if requested - # Plot incremental steps. - if (plot_steps){ + if (!is.null(num_ref_groups)) { + + if (! has_reference_cells(infercnv_obj)) { + stop("Error, no reference cells defined. Cannot split them into groups as requested") + } + + step_count = step_count + 1 + flog.info(sprintf("\n\n\tSTEP %02d: splitting reference data into %d clusters\n", step_count, num_ref_groups)) - infercnv_obj_smoothed_by_chr <- infercnv_obj - save('infercnv_obj_smoothed_by_chr', file=file.path(out_dir, sprintf("%02d_smoothed_by_chr.infercnv_obj", step_count))) + infercnv_obj_file = file.path(out_dir, sprintf("%02d_split_%02d_refs%s.infercnv_obj", step_count, resume_file_token, num_ref_groups)) + + if (resume_mode & file.exists(infercnv_obj_file)) { + flog.info(sprintf("-restoring infercnv_obj from %s", infercnv_obj_file)) + infercnv_obj <- readRDS(infercnv_obj_file) + } else { + infercnv_obj <- split_references(infercnv_obj, + num_groups=num_ref_groups, + hclust_method=hclust_method) + saveRDS(infercnv_obj, file=infercnv_obj_file) + } - plot_cnv(infercnv_obj, - k_obs_groups=k_obs_groups, - cluster_by_groups=cluster_by_groups, - out_dir=out_dir, - color_safe_pal=FALSE, - x.center=mean(infercnv_obj@expr.data), - x.range="auto", - title=sprintf("%02d_smoothed_by_chr",step_count), - obs_title="Observations (Cells)", - ref_title="References (Cells)", - output_filename=sprintf("infercnv.%02d_smoothed_by_chr", step_count)) } - - ## - # Step: - # Center cells/observations after smoothing. This helps reduce the - # effect of complexity. - - step_count = step_count + 1 - flog.info(sprintf("\n\n\tSTEP %02d: re-centering data across chromosome after smoothing\n", step_count)) - - infercnv_obj <- center_cell_expr_across_chromosome(infercnv_obj, method="median") - - # Plot incremental steps. - if (plot_steps) { - - infercnv_obj_cell_centered <- infercnv_obj + if (analysis_mode == 'subclusters' & tumor_subcluster_partition_method == 'random_trees') { - save('infercnv_obj_cell_centered', file=file.path(out_dir, sprintf("%02d_recentered_cells_by_chr.infercnv_obj", step_count))) + step_count = step_count + 1 + flog.info(sprintf("\n\n\tSTEP %02d: computing tumor subclusters via %s\n", step_count, tumor_subcluster_partition_method)) - plot_cnv(infercnv_obj, - k_obs_groups=k_obs_groups, - cluster_by_groups=cluster_by_groups, - out_dir=out_dir, - color_safe_pal=FALSE, - x.center=mean(infercnv_obj@expr.data), - x.range="auto", - title=sprintf("%02d_centering_of_smoothed",step_count), - obs_title="Observations (Cells)", - ref_title="References (Cells)", - output_filename=sprintf("infercnv.%02d_centering_of_smoothed", step_count)) + resume_file_token = paste0(resume_file_token, ".rand_trees") + infercnv_obj_file = file.path(out_dir, sprintf("%02d_tumor_subclusters%s.%s.infercnv_obj", step_count, resume_file_token, tumor_subcluster_partition_method)) + if (resume_mode & file.exists(infercnv_obj_file)) { + flog.info(sprintf("-restoring infercnv_obj from %s", infercnv_obj_file)) + infercnv_obj <- readRDS(infercnv_obj_file) + } else { + infercnv_obj <- define_signif_tumor_subclusters_via_random_smooothed_trees(infercnv_obj, + p_val=tumor_subcluster_pval, + hclust_method=hclust_method) + saveRDS(infercnv_obj, file=infercnv_obj_file) + + if (plot_steps) { + + plot_cnv(infercnv_obj, + k_obs_groups=k_obs_groups, + cluster_by_groups=cluster_by_groups, + out_dir=out_dir, + title=sprintf("%02d_tumor_subclusters.%s", step_count, tumor_subcluster_partition_method), + output_filename=sprintf("infercnv.%02d_tumor_subclusters.%s", step_count, tumor_subcluster_partition_method), + write_expr_matrix=TRUE, + png_res=png_res) + + } + } } - ################################################### - # Step: Split the reference data into groups if requested - - if (!is.null(num_ref_groups)) { - - step_count = step_count + 1 - flog.info(sprintf("\n\n\tSTEP %02d: splitting reference data into %d clusters\n", step_count, num_ref_groups)) + else if (analysis_mode != 'subclusters') { - infercnv_obj <- split_references(infercnv_obj, - num_groups=num_ref_groups, - hclust_method=hclust_method) + step_count = step_count + 1 + flog.info(sprintf("\n\n\tSTEP %02d: Clustering samples (not defining tumor subclusters)\n", step_count)) + infercnv_obj_file = file.path(out_dir, sprintf("%02d_no_subclustering%s.infercnv_obj", step_count, resume_file_token)) + ## just need to be sure that the cells are clustered per sample + ## so running with partition_mode='none' + if (resume_mode & file.exists(infercnv_obj_file)) { + flog.info(sprintf("-restoring infercnv_obj from %s", infercnv_obj_file)) + infercnv_obj <- readRDS(infercnv_obj_file) + } else { + + + infercnv_obj <- define_signif_tumor_subclusters(infercnv_obj, + p_val=tumor_subcluster_pval, + hclust_method=hclust_method, + partition_method='none') + + saveRDS(infercnv_obj, file=infercnv_obj_file) + } } - #################################### + ## ################################## ## Step: Subtract average reference ## Since we're in log space, this now becomes log(fold_change) - - step_count = step_count + 1 - flog.info(sprintf("\n\n\tSTEP %02d: removing average of reference data\n", step_count)) - infercnv_obj <- subtract_ref_expr_from_obs(infercnv_obj, inv_log=TRUE) + step_count = step_count + 1 + flog.info(sprintf("\n\n\tSTEP %02d: removing average of reference data (before smoothing)\n", step_count)) + infercnv_obj_file = file.path(out_dir, + sprintf("%02d_remove_ref_avg_from_obs_logFC%s.infercnv_obj", step_count, resume_file_token)) - # Plot incremental steps. - if (plot_steps){ - - infercnv_obj_subtract_ref <- infercnv_obj + if (resume_mode & file.exists(infercnv_obj_file)) { + flog.info(sprintf("-restoring infercnv_obj from %s", infercnv_obj_file)) + infercnv_obj <- readRDS(infercnv_obj_file) + } else { + infercnv_obj <- subtract_ref_expr_from_obs(infercnv_obj, inv_log=FALSE, use_bounds=ref_subtract_use_mean_bounds) - save('infercnv_obj_subtract_ref', file=file.path(out_dir, sprintf("%02d_remove_ref_avg_from_obs.infercnv_obj", step_count))) - - plot_cnv(infercnv_obj, - k_obs_groups=k_obs_groups, - cluster_by_groups=cluster_by_groups, - out_dir=out_dir, - color_safe_pal=FALSE, - x.center=0, - x.range="auto", - title=sprintf("%02d_remove_average",step_count), - obs_title="Observations (Cells)", - ref_title="References (Cells)", - output_filename=sprintf("infercnv.%02d_remove_average", step_count)) + saveRDS(infercnv_obj, file=infercnv_obj_file) + if (plot_steps) { + plot_cnv(infercnv_obj, + k_obs_groups=k_obs_groups, + cluster_by_groups=cluster_by_groups, + out_dir=out_dir, + title=sprintf("%02d_remove_average",step_count), + output_filename=sprintf("infercnv.%02d_remove_average", step_count), + write_expr_matrix=TRUE, + png_res=png_res) + } } - - ## Step 08: - # Remove Ends - - if (remove_genes_at_chr_ends == TRUE) { - + + if (! is.na(max_centered_threshold)) { + + ## ##################################################### + ## Apply maximum centered expression thresholds to data + ## Cap values between threshold and -threshold, retaining earlier center + step_count = step_count + 1 - flog.info(sprintf("\n\n\tSTEP %02d: removing genes at chr ends\n", step_count)) + flog.info(sprintf("\n\n\tSTEP %02d: apply max centered expression threshold: %s\n", step_count, max_centered_threshold)) - infercnv_obj <- remove_genes_at_ends_of_chromosomes(infercnv_obj, window_length) + infercnv_obj_file=file.path(out_dir, sprintf("%02d_apply_max_centered_expr_threshold%s.infercnv_obj", step_count, resume_file_token)) - # Plot incremental steps. - if (plot_steps){ + if (resume_mode & file.exists(infercnv_obj_file)) { + flog.info(sprintf("-restoring infercnv_obj from %s", infercnv_obj_file)) + infercnv_obj <- readRDS(infercnv_obj_file) + } else { + + threshold = max_centered_threshold + if (is.character(max_centered_threshold) && max_centered_threshold == "auto") { + threshold = mean(abs(get_average_bounds(infercnv_obj))) + flog.info(sprintf("Setting max centered threshoolds via auto to: +- %g", threshold)) + } + + infercnv_obj <- apply_max_threshold_bounds(infercnv_obj, threshold=threshold) + + saveRDS(infercnv_obj, file=infercnv_obj_file) - infercnv_obj_remove_chr_end_genes <- infercnv_obj - save('infercnv_obj_remove_chr_end_genes', file=file.path(out_dir, sprintf("%02d_remove_gene_at_chr_ends.infercnv_obj", step_count))) + ## Plot incremental steps. + if (plot_steps){ + + plot_cnv(infercnv_obj, + k_obs_groups=k_obs_groups, + cluster_by_groups=cluster_by_groups, + out_dir=out_dir, + title=sprintf("%02d_apply_max_centered_expr_threshold",step_count), + output_filename=sprintf("infercnv.%02d_apply_max_centred_expr_threshold",step_count), + write_expr_matrix=TRUE, + png_res=png_res) + + } + } + } + + + + + + + + ## ######################################################################### + ## Step: For each cell, smooth the data along chromosome with gene windows + + step_count = step_count + 1 + flog.info(sprintf("\n\n\tSTEP %02d: Smoothing data per cell by chromosome\n", step_count)) + + infercnv_obj_file = file.path(out_dir, sprintf("%02d_smoothed_by_chr%s.infercnv_obj", step_count, resume_file_token)) + + if (resume_mode & file.exists(infercnv_obj_file)) { + flog.info(sprintf("-restoring infercnv_obj from %s", infercnv_obj_file)) + infercnv_obj <- readRDS(infercnv_obj_file) + } else { + + if (smooth_method == 'runmeans') { + + infercnv_obj <- smooth_by_chromosome_runmeans(infercnv_obj, window_length) + } else if (smooth_method == 'pyramidinal') { + + infercnv_obj <- smooth_by_chromosome(infercnv_obj, window_length=window_length, smooth_ends=TRUE) + } else { + stop(sprintf("Error, don't recognize smoothing method: %s", smooth_method)) + } + + saveRDS(infercnv_obj, file=infercnv_obj_file) + + ## Plot incremental steps. + if (plot_steps){ plot_cnv(infercnv_obj, k_obs_groups=k_obs_groups, cluster_by_groups=cluster_by_groups, out_dir=out_dir, - color_safe_pal=FALSE, - x.center=0, - x.range="auto", - title=sprintf("%02d_remove_genes_at_chr_ends",step_count), - obs_title="Observations (Cells)", - ref_title="References (Cells)", - output_filename=sprintf("infercnv.%02d_remove_genes_at_chr_ends",step_count), - write_expr_matrix=TRUE) + title=sprintf("%02d_smoothed_by_chr",step_count), + output_filename=sprintf("infercnv.%02d_smoothed_by_chr", step_count), + write_expr_matrix=TRUE, + png_res=png_res) + } + } + + + ## + ## Step: + ## Center cells/observations after smoothing. This helps reduce the + ## effect of complexity. + + step_count = step_count + 1 + flog.info(sprintf("\n\n\tSTEP %02d: re-centering data across chromosome after smoothing\n", step_count)) + + infercnv_obj_file = file.path(out_dir, sprintf("%02d_recentered_cells_by_chr%s.infercnv_obj", step_count, resume_file_token)) + if (resume_mode & file.exists(infercnv_obj_file)) { + flog.info(sprintf("-restoring infercnv_obj from %s", infercnv_obj_file)) + infercnv_obj <- readRDS(infercnv_obj_file) + } else { + infercnv_obj <- center_cell_expr_across_chromosome(infercnv_obj, method="median") + + saveRDS(infercnv_obj, file=infercnv_obj_file) + + ## Plot incremental steps. + if (plot_steps) { + + plot_cnv(infercnv_obj, + k_obs_groups=k_obs_groups, + cluster_by_groups=cluster_by_groups, + out_dir=out_dir, + title=sprintf("%02d_centering_of_smoothed",step_count), + output_filename=sprintf("infercnv.%02d_centering_of_smoothed", step_count), + write_expr_matrix=TRUE, + png_res=png_res) } } - ############################# - # Step: invert log transform (convert from log(FC) to FC) - + + ## ################################## + ## Step: Subtract average reference (adjustment) + step_count = step_count + 1 - flog.info(sprintf("\n\n\tSTEP %02d: invert log2(FC) to FC\n", step_count)) + flog.info(sprintf("\n\n\tSTEP %02d: removing average of reference data (after smoothing)\n", step_count)) - infercnv_obj <- invert_log2(infercnv_obj) - - # Plot incremental steps. - if (plot_steps) { + infercnv_obj_file = file.path(out_dir, + sprintf("%02d_remove_ref_avg_from_obs_adjust%s.infercnv_obj", step_count, resume_file_token)) + + if (resume_mode & file.exists(infercnv_obj_file)) { + flog.info(sprintf("-restoring infercnv_obj from %s", infercnv_obj_file)) + infercnv_obj <- readRDS(infercnv_obj_file) + } else { + infercnv_obj <- subtract_ref_expr_from_obs(infercnv_obj, inv_log=FALSE, use_bounds=ref_subtract_use_mean_bounds) - infercnv_obj_invert_log_transform <- infercnv_obj + saveRDS(infercnv_obj, file=infercnv_obj_file) - save('infercnv_obj_invert_log_transform', file=file.path(out_dir, sprintf("%02d_invert_log_transform.infercnv_obj", step_count))) + if (plot_steps) { + plot_cnv(infercnv_obj, + k_obs_groups=k_obs_groups, + cluster_by_groups=cluster_by_groups, + out_dir=out_dir, + title=sprintf("%02d_remove_average",step_count), + output_filename=sprintf("infercnv.%02d_remove_average", step_count), + write_expr_matrix=TRUE, + png_res=png_res) + } + } + + + ## Step: Remove Ends + + if (remove_genes_at_chr_ends == TRUE) { - plot_cnv(infercnv_obj, - k_obs_groups=k_obs_groups, - cluster_by_groups=cluster_by_groups, - out_dir=out_dir, - color_safe_pal=FALSE, - x.center=1, - x.range="auto", - title=sprintf("%02d_invert_log_transform log(FC)->FC",step_count), - obs_title="Observations (Cells)", - ref_title="References (Cells)", - output_filename=sprintf("infercnv.%02d_invert_log_FC",step_count), - write_expr_matrix=TRUE) + step_count = step_count + 1 + flog.info(sprintf("\n\n\tSTEP %02d: removing genes at chr ends\n", step_count)) + + infercnv_obj_file = file.path(out_dir, sprintf("%02d_remove_gene_at_chr_ends%s.infercnv_obj", step_count, resume_file_token)) + if (resume_mode & file.exists(infercnv_obj_file)) { + flog.info(sprintf("-restoring infercnv_obj from %s", infercnv_obj_file)) + infercnv_obj <- readRDS(infercnv_obj_file) + } else { + infercnv_obj <- remove_genes_at_ends_of_chromosomes(infercnv_obj, window_length) + + saveRDS(infercnv_obj, file=infercnv_obj_file) + + ## Plot incremental steps. + if (plot_steps){ + + plot_cnv(infercnv_obj, + k_obs_groups=k_obs_groups, + cluster_by_groups=cluster_by_groups, + out_dir=out_dir, + title=sprintf("%02d_remove_genes_at_chr_ends",step_count), + output_filename=sprintf("infercnv.%02d_remove_genes_at_chr_ends",step_count), + write_expr_matrix=TRUE, + png_res=png_res) + + } + } } - ################################ - # Step: de-noising - + ## ########################### + ## Step: invert log transform (convert from log(FC) to FC) + step_count = step_count + 1 - flog.info(sprintf("\n\n\tSTEP %02d: Denoising\n", step_count)) + flog.info(sprintf("\n\n\tSTEP %02d: invert log2(FC) to FC\n", step_count)) - if (! is.na(noise_filter)) { - - if (noise_filter > 0) { - flog.info(paste("::process_data:Remove noise, noise threshold at: ", noise_filter)) - infercnv_obj <- clear_noise(infercnv_obj, - threshold=noise_filter) - } - else { - # noise == 0 or negative... - # don't remove noise. - } + infercnv_obj_file = file.path(out_dir, sprintf("%02d_invert_log_transform%s.infercnv_obj", step_count, resume_file_token)) + + if (resume_mode & file.exists(infercnv_obj_file)) { + flog.info(sprintf("-restoring infercnv_obj from %s", infercnv_obj_file)) + infercnv_obj <- readRDS(infercnv_obj_file) + } else { + infercnv_obj <- invert_log2(infercnv_obj) + + saveRDS(infercnv_obj, file=infercnv_obj_file) + + if (plot_steps) { + plot_cnv(infercnv_obj, + k_obs_groups=k_obs_groups, + cluster_by_groups=cluster_by_groups, + out_dir=out_dir, + title=sprintf("%02d_invert_log_transform log(FC)->FC",step_count), + output_filename=sprintf("infercnv.%02d_invert_log_FC",step_count), + write_expr_matrix=TRUE, + png_res=png_res) + + } } - else { - # default, use quantiles, if NA - flog.info(paste("::process_data:Remove noise, noise threshold defined via ref mean sd_amplifier: ", sd_amplifier)) - infercnv_obj <- clear_noise_via_ref_mean_sd(infercnv_obj, - sd_amplifier = sd_amplifier) - } - - - - if (plot_steps){ + + ## ################################################################### + ## Done restoring infercnv_obj's from files now under resume_mode + ## ################################################################### + + if (analysis_mode == 'subclusters' & tumor_subcluster_partition_method != 'random_trees') { - infercnv_obj_denoised <- infercnv_obj + resume_file_token = paste0(resume_file_token, '.', tumor_subcluster_partition_method) - save('infercnv_obj_denoised', file=file.path(out_dir, sprintf("%02d_denoise.infercnv_obj", step_count))) + step_count = step_count + 1 + flog.info(sprintf("\n\n\tSTEP %02d: computing tumor subclusters via %s\n", step_count, tumor_subcluster_partition_method)) - plot_cnv(infercnv_obj, - k_obs_groups=k_obs_groups, - cluster_by_groups=cluster_by_groups, - out_dir=out_dir, - color_safe_pal=FALSE, - x.center=1, - x.range="auto", - title=sprintf("%02d_denoised", step_count), - obs_title="Observations (Cells)", - ref_title="References (Cells)", - output_filename=sprintf("infercnv.%02d_denoised", step_count)) + infercnv_obj_file = file.path(out_dir, sprintf("%02d_tumor_subclusters%s.infercnv_obj", step_count, resume_file_token)) + if (resume_mode && file.exists(infercnv_obj_file)) { + flog.info(sprintf("-restoring infercnv_obj from %s", infercnv_obj_file)) + infercnv_obj <- readRDS(infercnv_obj_file) + } else { + infercnv_obj <- define_signif_tumor_subclusters(infercnv_obj, + p_val=tumor_subcluster_pval, + hclust_method=hclust_method, + partition_method=tumor_subcluster_partition_method) + + saveRDS(infercnv_obj, file=infercnv_obj_file) + + if (plot_steps) { + + plot_cnv(infercnv_obj, + k_obs_groups=k_obs_groups, + cluster_by_groups=cluster_by_groups, + out_dir=out_dir, + title=sprintf("%02d_tumor_subclusters",step_count), + output_filename=sprintf("infercnv.%02d_tumor_subclusters",step_count), + write_expr_matrix=TRUE, + png_res=png_res) + } + + } } - ################################## - # STEP: Remove outliers for viz - - step_count = step_count + 1 - flog.info(sprintf("\n\n\tSTEP %02d: Removing outliers\n", step_count)) - - infercnv_obj = remove_outliers_norm(infercnv_obj, - out_method=outlier_method_bound, - lower_bound=outlier_lower_bound, - upper_bound=outlier_upper_bound) + ## This is a milestone step and results should always be examined here. + infercnv_obj_prelim <- infercnv_obj + infercnv_obj_file = file.path(out_dir, "preliminary.infercnv_obj") + saveRDS(infercnv_obj_prelim, file=infercnv_obj_file) + if (! (no_prelim_plot | no_plot) ) { + + prelim_heatmap_png = "infercnv.preliminary.png" + + if (! file.exists(file.path(out_dir, prelim_heatmap_png))) { + plot_cnv(infercnv_obj_prelim, + k_obs_groups=k_obs_groups, + cluster_by_groups=cluster_by_groups, + out_dir=out_dir, + title="Preliminary infercnv (pre-noise filtering)", + output_filename="infercnv.preliminary", # png ext auto added + write_expr_matrix=TRUE, + png_res=png_res) + } + } - # Plot incremental steps. - if (plot_steps) { - - infercnv_obj_remove_outliers <- infercnv_obj - - save('infercnv_obj_remove_outliers', file=file.path(out_dir, sprintf("%02d_remove_outlier.infercnv_obj", step_count))) + ## Below represent optional downstream analysis steps: + + if (prune_outliers) { - plot_cnv(infercnv_obj, - k_obs_groups=k_obs_groups, - cluster_by_groups=cluster_by_groups, - out_dir=out_dir, - color_safe_pal=FALSE, - x.center=1, - x.range="auto", - title=sprintf("%02d_removed_outliers",step_count), - obs_title="Observations (Cells)", - ref_title="References (Cells)", - output_filename=sprintf("infercnv.%02d_removed_outliers", step_count)) + ## ################################ + ## STEP: Remove outliers for viz + + step_count = step_count + 1 + flog.info(sprintf("\n\n\tSTEP %02d: Removing outliers\n", step_count)) + + infercnv_obj_file = file.path(out_dir, sprintf("%02d_remove_outlier%s.infercnv_obj", step_count, resume_file_token)) + + if (resume_mode & file.exists(infercnv_obj_file)) { + flog.info(sprintf("-restoring infercnv_obj from %s", infercnv_obj_file)) + infercnv_obj <- readRDS(infercnv_obj_file) + } else { + + infercnv_obj = remove_outliers_norm(infercnv_obj, + out_method=outlier_method_bound, + lower_bound=outlier_lower_bound, + upper_bound=outlier_upper_bound) + + saveRDS(infercnv_obj, file=infercnv_obj_file) + + ## Plot incremental steps. + if (plot_steps) { + + plot_cnv(infercnv_obj, + k_obs_groups=k_obs_groups, + cluster_by_groups=cluster_by_groups, + out_dir=out_dir, + title=sprintf("%02d_removed_outliers",step_count), + output_filename=sprintf("infercnv.%02d_removed_outliers", step_count), + write_expr_matrix=TRUE, + png_res=png_res) + } + } } - - - - # define heatmap thresholds for final plots. - plot_data = infercnv_obj@expr.data - high_threshold = max(abs(quantile(plot_data[plot_data != 0], c(0.05, 0.95)))) - low_threshold = -1 * high_threshold - if (include.spike) { - + if (HMM) { step_count = step_count + 1 - flog.info(sprintf("\n\n\tSTEP %02d: Scaling according to spike\n", step_count)) + flog.info(sprintf("\n\n\tSTEP %02d: HMM-based CNV prediction\n", step_count)) + + hmm_resume_file_token = paste0(resume_file_token, ".hmm_mode-", analysis_mode) + + hmm.infercnv_obj_file = file.path(out_dir, sprintf("%02d_HMM_pred%s.infercnv_obj", step_count, hmm_resume_file_token)) + + if (resume_mode & file.exists(hmm.infercnv_obj_file)) { + flog.info(sprintf("-restoring hmm.infercnv_obj from %s", hmm.infercnv_obj_file)) + hmm.infercnv_obj <- readRDS(hmm.infercnv_obj_file) + } else { + + + if (HMM_type == 'i6') { + hmm_center = 3 + hmm_state_range = c(0,6) + } else { + ## i3 + hmm_center = 2 + hmm_state_range = c(1,3) + } + + if (analysis_mode == 'subclusters') { - # normalize by spike - infercnv_obj <- scale_cnv_by_spike(infercnv_obj) - - # now thresholds should be between 0 and 2 after spike-based scaling - low_threshold = 0 - high_threshold = 2 + if (HMM_type == 'i6') { + hmm.infercnv_obj <- predict_CNV_via_HMM_on_tumor_subclusters(infercnv_obj, + t=HMM_transition_prob) + } else if (HMM_type == 'i3') { + hmm.infercnv_obj <- i3HMM_predict_CNV_via_HMM_on_tumor_subclusters(infercnv_obj, + i3_p_val=HMM_i3_pval, + t=HMM_transition_prob, + use_KS=HMM_i3_use_KS) + } else { + stop("Error, not recognizing HMM_type") + } + + } else if (analysis_mode == 'cells') { + + if (HMM_type == 'i6') { + hmm.infercnv_obj <- predict_CNV_via_HMM_on_indiv_cells(infercnv_obj, t=HMM_transition_prob) + } else if (HMM_type == 'i3') { + hmm.infercnv_obj <- i3HMM_predict_CNV_via_HMM_on_indiv_cells(infercnv_obj, + i3_p_val=HMM_i3_pval, + t=HMM_transition_prob, + use_KS=HMM_i3_use_KS) + } else { + stop("Error, not recognizing HMM_type") + } + + + } else { + ## samples mode + + if (HMM_type == 'i6') { + hmm.infercnv_obj <- predict_CNV_via_HMM_on_whole_tumor_samples(infercnv_obj, t=HMM_transition_prob) + } else if (HMM_type == 'i3') { + hmm.infercnv_obj <- i3HMM_predict_CNV_via_HMM_on_tumor_subclusters(infercnv_obj, + i3_p_val=HMM_i3_pval, + t=HMM_transition_prob, + use_KS=HMM_i3_use_KS + ) + } else { + stop("Error, not recognizing HMM_type") + } + + } + + ## ################################## + ## Note, HMM invercnv object is only leveraged here, but stored as file for future use: + ## ################################## + + + saveRDS(hmm.infercnv_obj, file=hmm.infercnv_obj_file) + + ## report predicted cnv regions: + generate_cnv_region_reports(hmm.infercnv_obj, + output_filename_prefix=sprintf("%02d_HMM_preds", step_count), + out_dir=out_dir, + ignore_neutral_state=hmm_center, + by=HMM_report_by) + + + + + if (! no_plot) { + + ## Plot HMM pred img + plot_cnv(infercnv_obj=hmm.infercnv_obj, + k_obs_groups=k_obs_groups, + cluster_by_groups=cluster_by_groups, + out_dir=out_dir, + title=sprintf("%02d_HMM_preds",step_count), + output_filename=sprintf("infercnv.%02d_HMM_pred%s",step_count, hmm_resume_file_token), + write_expr_matrix=TRUE, + x.center=hmm_center, + x.range=hmm_state_range, + png_res=png_res + ) + } + } - if (plot_steps) { - infercnv_obj_scaled_by_spike <- infercnv_obj - save('infercnv_obj_scaled_by_spike', file=file.path(out_dir, sprintf("%02d_scaled_by_spike.infercnv_obj", step_count))) + ## ############################################################ + ## Bayesian Network Mixture Model + ## ############################################################ + + if (HMM_type == 'i6' & BayesMaxPNormal > 0) { + step_count = step_count + 1 + flog.info(sprintf("\n\n\tSTEP %02d: Run Bayesian Network Model on HMM predicted CNV's\n", step_count)) - plot_cnv(infercnv_obj, - k_obs_groups=k_obs_groups, - cluster_by_groups=cluster_by_groups, - out_dir=out_dir, - color_safe_pal=FALSE, - x.center=1, - x.range=c(low_threshold, high_threshold), - title=sprintf("%02d_scaled_by_spike",step_count), - obs_title="Observations (Cells)", - ref_title="References (Cells)", - output_filename=sprintf("infercnv.%02d_scaled_by_spike", step_count)) + ## the MCMC object + mcmc_obj_file = file.path(out_dir, sprintf("%02d_HMM_pred.Bayes_Net%s.mcmc_obj", + step_count, hmm_resume_file_token)) + + if (resume_mode & file.exists(mcmc_obj_file)) { + flog.info(sprintf("-restoring mcmc_obj from %s", mcmc_obj_file)) + mcmc_obj <- readRDS(mcmc_obj_file) + } else { + + mcmc_obj <- infercnv::inferCNVBayesNet( infercnv_obj = infercnv_obj_prelim, + HMM_obj = hmm.infercnv_obj, + file_dir = out_dir, + postMcmcMethod = "removeCNV", + out_dir = file.path(out_dir, sprintf("BayesNetOutput.%s", hmm_resume_file_token)), + quietly = TRUE, + CORES = num_threads, + plotingProbs = plot_probabilities, + diagnostics = diagnostics) + saveRDS(mcmc_obj, file=mcmc_obj_file) + } + + ## Filter CNV's by posterior Probabilities + mcmc_obj <- infercnv::filterHighPNormals( MCMC_inferCNV_obj = mcmc_obj, + BayesMaxPNormal = BayesMaxPNormal) + + ## Create new inferCNV objecrt with CNV's removed + hmm.infercnv_obj <- infercnv::returningInferCNV(mcmc_obj, hmm.infercnv_obj) + + ## Save the MCMC inferCNV object + mcmc.infercnv_obj_file = file.path(out_dir, sprintf("%02d_HMM_pred.Bayes_Net%s.Pnorm_%g.infercnv_obj", + step_count, hmm_resume_file_token, BayesMaxPNormal)) + + saveRDS(hmm.infercnv_obj, file=mcmc.infercnv_obj_file) + + if (! no_plot) { + ## Plot HMM pred img after cnv removal + plot_cnv(infercnv_obj=hmm.infercnv_obj, + k_obs_groups=k_obs_groups, + cluster_by_groups=cluster_by_groups, + out_dir=out_dir, + title=sprintf("%02d_HMM_preds_Bayes_Net",step_count), + output_filename=sprintf("infercnv.%02d_HMM_pred.Bayes_Net.Pnorm_%g",step_count, BayesMaxPNormal), + write_expr_matrix=TRUE, + x.center=3, + x.range=c(0,6), + png_res=png_res + ) + } } + + ## convert from states to representative intensity values + + step_count = step_count + 1 + flog.info(sprintf("\n\n\tSTEP %02d: Converting HMM-based CNV states to repr expr vals\n", step_count)) + + hmm.infercnv_obj_file = file.path(out_dir, sprintf("%02d_HMM_pred.repr_intensities%s.Pnorm_%g.infercnv_obj", + step_count, hmm_resume_file_token, BayesMaxPNormal)) + + if (resume_mode & file.exists(hmm.infercnv_obj_file)) { + flog.info(sprintf("-restoring hmm.infercnv_obj from %s", hmm.infercnv_obj_file)) + hmm.infercnv_obj <- readRDS(hmm.infercnv_obj_file) + } else { + + if (HMM_type == 'i6') { + hmm.infercnv_obj <- assign_HMM_states_to_proxy_expr_vals(hmm.infercnv_obj) + } else if (HMM_type == 'i3') { + hmm.infercnv_obj <- i3HMM_assign_HMM_states_to_proxy_expr_vals(hmm.infercnv_obj) + } + + saveRDS(hmm.infercnv_obj, file=hmm.infercnv_obj_file) + + ## Plot HMM pred img + if (! no_plot) { + plot_cnv(infercnv_obj=hmm.infercnv_obj, + k_obs_groups=k_obs_groups, + cluster_by_groups=cluster_by_groups, + out_dir=out_dir, + title=sprintf("%02d_HMM_preds.repr_intensities",step_count), + output_filename=sprintf("infercnv.%02d_HMM_pred%s.repr_intensities", step_count, hmm_resume_file_token), + write_expr_matrix=TRUE, + x.center=1, + x.range=c(-1,3), + png_res=png_res + ) + } + + ## write the adjusted CNV report files + ## report predicted cnv regions: + generate_cnv_region_reports(hmm.infercnv_obj, + output_filename_prefix=sprintf("HMM_CNV_predictions.%s.Pnorm_%g", hmm_resume_file_token, BayesMaxPNormal), + out_dir=out_dir, + ignore_neutral_state=1, + by=HMM_report_by) + + } } - + ## all processes that are alternatives to the HMM prediction wrt DE analysis and/or denoising ## Step: Filtering significantly DE genes if (mask_nonDE_genes) { - + + if (!has_reference_cells(infercnv_obj)) { + stop("Error, cannot mask non-DE genes when there are no normal references set") + } + step_count = step_count + 1 flog.info(sprintf("\n\n\tSTEP %02d: Identify and mask non-DE genes\n", step_count)) - - infercnv_obj <- mask_non_DE_genes_basic(infercnv_obj, test.use = test.use, center_val=mean(plot_data)) - - - # Plot incremental steps. - if (plot_steps) { + + infercnv_obj_file = file.path(out_dir, sprintf("%02d_mask_nonDE%s.infercnv_obj", step_count, resume_file_token)) + + if (resume_mode & file.exists(infercnv_obj_file)) { + flog.info(sprintf("-restoring infercnv_obj from %s", infercnv_obj_file)) + infercnv_obj <- readRDS(infercnv_obj_file) + } else { - infercnv_obj_mask_nonDE <- infercnv_obj + infercnv_obj <- mask_non_DE_genes_basic(infercnv_obj, + p_val_thresh=mask_nonDE_pval, + test.use = test.use, + center_val=mean(infercnv_obj@expr.data), + require_DE_all_normals=require_DE_all_normals) - save('infercnv_obj_mask_nonDE', file=file.path(out_dir, sprintf("%02d_mask_nonDE.infercnv_obj", step_count))) - - plot_cnv(infercnv_obj, - k_obs_groups=k_obs_groups, - cluster_by_groups=cluster_by_groups, - out_dir=out_dir, - color_safe_pal=FALSE, - x.center=1, - x.range=c(low_threshold,high_threshold), - title=sprintf("%02d_mask_nonDE",step_count), - obs_title="Observations (Cells)", - ref_title="References (Cells)", - output_filename=sprintf("infercnv.%02d_mask_nonDE", step_count)) + saveRDS(infercnv_obj, file=infercnv_obj_file) + ## Plot incremental steps. + if (plot_steps) { + + plot_cnv(infercnv_obj, + k_obs_groups=k_obs_groups, + cluster_by_groups=cluster_by_groups, + out_dir=out_dir, + title=sprintf("%02d_mask_nonDE",step_count), + output_filename=sprintf("infercnv.%02d_mask_nonDE", step_count), + write_expr_matrix=TRUE, + png_res=png_res) + + } } } - - if (include.spike) { - # remove the spike before making the final plot. - infercnv_obj <- remove_spike(infercnv_obj) - } - save('infercnv_obj', file=file.path(out_dir, "run.final.infercnv_obj")) + if (denoise) { + + ## ############################## + ## Step: de-noising + + step_count = step_count + 1 + flog.info(sprintf("\n\n\tSTEP %02d: Denoising\n", step_count)) + + infercnv_obj_file = file.path(out_dir, sprintf("%02d_denoise%s.NF_%s.SD_%g.NL_%s.infercnv_obj", + step_count, resume_file_token, + noise_filter, sd_amplifier, noise_logistic)) + + + if (resume_mode & file.exists(infercnv_obj_file)) { + flog.info(sprintf("-restoring infercnv_obj from %s", infercnv_obj_file)) + infercnv_obj <- readRDS(infercnv_obj_file) + } else { + + + if (! is.na(noise_filter)) { + + if (noise_filter > 0) { + flog.info(paste("::process_data:Remove noise, noise threshold at: ", noise_filter)) + infercnv_obj <- clear_noise(infercnv_obj, + threshold=noise_filter, + noise_logistic=noise_logistic) + } + else { + ## noise == 0 or negative... + ## don't remove noise. + } + + } + else { + ## default, use quantiles, if NA + flog.info(paste("::process_data:Remove noise, noise threshold defined via ref mean sd_amplifier: ", sd_amplifier)) + infercnv_obj <- clear_noise_via_ref_mean_sd(infercnv_obj, + sd_amplifier = sd_amplifier, + noise_logistic=noise_logistic) + } + + saveRDS(infercnv_obj, file=infercnv_obj_file) + + + if (! no_plot) { + plot_cnv(infercnv_obj, + k_obs_groups=k_obs_groups, + cluster_by_groups=cluster_by_groups, + out_dir=out_dir, + color_safe_pal=FALSE, + title=sprintf("%02d_denoised", step_count), + output_filename=sprintf("infercnv.%02d_denoised", step_count), + write_expr_matrix=TRUE, + png_res=png_res) + } + + } + } + - flog.info("Making the final infercnv heatmap") - plot_cnv(infercnv_obj, - k_obs_groups=k_obs_groups, - cluster_by_groups=cluster_by_groups, - out_dir=out_dir, - color_safe_pal=FALSE, - x.center=1, - x.range=c(low_threshold,high_threshold), - title="inferCNV", - obs_title="Observations (Cells)", - ref_title="References (Cells)", - output_filename="infercnv") + saveRDS(infercnv_obj, file=file.path(out_dir, "run.final.infercnv_obj")) - - return(infercnv_obj) - -} - -#' Function for Generating a next-generation heatmap -#' -#' @title ngchm() : generates next gen heatmap -#' -#' @param infercnv_obj An infercnv object -#' -#' @param out_dir output directory (default: '.') -#' -#' @param title title of the interactive heatmap (default: "NGCHM") -#' -#' @param gene_symbol ##TODO (default: NULL) -#' -#' @param path_to_shaidyMapGen path to the shaidyMapGen jar file (default: NULL) -#' -#' @param x.center (integer) Center expression value for heatmap coloring. -#' -#' @param x.range (integer) Values for minimum and maximum thresholds for heatmap coloring. -#' -#' @export -#' - -ngchm <- function(infercnv_obj, - out_dir=".", - title="NGCHM", - gene_symbol=NULL, - path_to_shaidyMapGen=NULL, - x.range = NA, - x.center = NA) { - - if (!is.null(path_to_shaidyMapGen)) { - shaidy.path <- unlist(strsplit(path_to_shaidyMapGen, split = .Platform$file.sep)) - if (!file.exists(path_to_shaidyMapGen) || tail(shaidy.path, n = 1L) != "ShaidyMapGen.jar"){ - error_message <- paste("Cannot find the file ShaidyMapGen.jar using the parameter \"path_to_shaidyMapGen\".", - "Check that the correct pathway is being used.") - flog.error(error_message) - stop(error_message) + if (! no_plot) { + if (is.null(final_scale_limits)) { + final_scale_limits = "auto" } - } else { - path_to_shaidyMapGen <- Sys.getenv("SHAIDYMAPGEN") - if (!file.exists(path_to_shaidyMapGen)){ ## check if envionrmental variable is passed - error_message <- paste("Cannot find the file ShaidyMapGen.jar using SHAIDYMAPGEN.", - "Check that the correct pathway is being used.") - flog.error(error_message) - stop(error_message) + if (is.null(final_center_val)) { + final_center_val = 1 } + + + flog.info("\n\n## Making the final infercnv heatmap ##") + plot_cnv(infercnv_obj, + k_obs_groups=k_obs_groups, + cluster_by_groups=cluster_by_groups, + out_dir=out_dir, + x.center=final_center_val, + x.range=final_scale_limits, + title="inferCNV", + output_filename="infercnv", + write_expr_matrix=TRUE, + png_res=png_res) } - if (!requireNamespace("NGCHM", quietly=TRUE)) { - stop("The \"NGCHM\" library is required to use \"-ngchm=TRUE\" but it is not available.", .call=FALSE) - } + return(infercnv_obj) - flog.info("Creating NGCHM as infercnv.ngchm") - Create_NGCHM(infercnv_obj = infercnv_obj, - path_to_shaidyMapGen = path_to_shaidyMapGen, - out_dir = out_dir, - title = title, - gene_symbol = gene_symbol, - x.range = x.range, - x.center = x.center) } - #' Subtracting the mean of the reference expr distributions from the observed cells. #' #' @title subtract_ref_expr_from_obs() @@ -809,35 +1204,49 @@ ngchm <- function(infercnv_obj, #' #' @param inv_log mean values will be determined based on (2^x -1) #' +#' @param use_bounds if multiple normal data sets are used, it takes the bounds of the means from each set for subtraction. +#' Alternatively, will use the mean( mean(normal) for each normal) default: TRUE +#' #' @return infercnv_obj containing the reference subtracted values. #' #' @export #' -subtract_ref_expr_from_obs <- function(infercnv_obj, inv_log=FALSE) { - # r = genes, c = cells - flog.info(paste("::subtract_ref_expr_from_obs:Start", sep="")) - - ref_groups = infercnv_obj@reference_grouped_cell_indices +subtract_ref_expr_from_obs <- function(infercnv_obj, inv_log=FALSE, use_bounds=TRUE) { + ## r = genes, c = cells + flog.info(sprintf("::subtract_ref_expr_from_obs:Start inv_log=%s, use_bounds=%s", inv_log, use_bounds)) - subtr_data <- .subtract_expr(infercnv_obj@expr.data, ref_groups, inv_log) - colnames(subtr_data) <- colnames(infercnv_obj@expr.data) - - infercnv_obj@expr.data <- subtr_data + if (has_reference_cells(infercnv_obj)) { + ref_groups = infercnv_obj@reference_grouped_cell_indices + flog.info("subtracting mean(normal) per gene per cell across all data") + } else { + ref_groups = list('proxyNormal' = unlist(infercnv_obj@observation_grouped_cell_indices)) + flog.info("-no reference cells specified... using mean of all cells as proxy") + } + + ref_grp_gene_means <- .get_normal_gene_mean_bounds(infercnv_obj@expr.data, ref_groups, inv_log=inv_log) + + infercnv_obj@expr.data <- .subtract_expr(infercnv_obj@expr.data, ref_grp_gene_means, use_bounds=use_bounds) + + if (! is.null(infercnv_obj@.hspike)) { + flog.info("-mirroring for hspike") + infercnv_obj@.hspike <- subtract_ref_expr_from_obs(infercnv_obj@.hspike, inv_log=inv_log, use_bounds=use_bounds) + } - flog.info("subtracting mean(normal) per gene per cell across all data") - return(infercnv_obj) - + } -.subtract_expr <- function(expr_matrix, ref_groups, inv_log=FALSE) { +#' @keywords internal +#' @noRd +#' + +.get_normal_gene_mean_bounds <- function(expr.data, ref_groups, inv_log=FALSE) { - subtract_normal_expr_fun <- function(x) { - - grp_min = NA - grp_max = NA + get_indiv_gene_group_means_bounds_fun <- function(x) { + + grp_means = c() for (ref_group in ref_groups) { @@ -847,29 +1256,74 @@ subtract_ref_expr_from_obs <- function(infercnv_obj, inv_log=FALSE) { ref_grp_mean = mean(x[ref_group]) } - grp_min = min(grp_min, ref_grp_mean, na.rm=T) - grp_max = max(grp_max, ref_grp_mean, na.rm=T) + grp_means = c(grp_means, ref_grp_mean) } - row_init = rep(0, length(x)) + names(grp_means) <- names(ref_groups) + + return(as.data.frame(t(data.frame(grp_means)))) + } + + gene_ref_grp_means <- do.call(rbind, apply(expr.data, 1, get_indiv_gene_group_means_bounds_fun)) + + rownames(gene_ref_grp_means) <- rownames(expr.data) + + return(gene_ref_grp_means) +} + + +#' @keywords internal +#' @noRd +#' + +.subtract_expr <- function(expr_matrix, ref_grp_gene_means, use_bounds=FALSE) { + + my.rownames = rownames(expr_matrix) + my.colnames = colnames(expr_matrix) + + flog.info(sprintf("-subtracting expr per gene, use_bounds=%s", use_bounds)) + + subtract_normal_expr_fun <- function(row_idx) { + + gene_means <- as.numeric(ref_grp_gene_means[row_idx, , drop=TRUE]) - above_max = which(x>grp_max) - below_min = which(xgrp_max) + below_min = which(x0) && @@ -946,10 +1410,10 @@ create_sep_list <- function(row_count, sepList[[1]] <- list() sepList[[1]][[1]] <- c(0,0,0,0) } - - # Row data - # This is measured from bottom to top - # So you have to adjust the values of the data + + # Row data + # This is measured from bottom to top + # So you have to adjust the values of the data row_seps <- row_count-row_seps if(!is.null(row_seps) && !is.na(row_seps) && @@ -969,10 +1433,10 @@ create_sep_list <- function(row_count, #' @title split_references() -#' +#' #' @description Split up reference observations in to k groups based on hierarchical clustering. #' -#' +#' #' @param infercnv_obj infercnv_object #' #' @param num_groups (default: 2) @@ -980,15 +1444,15 @@ create_sep_list <- function(row_count, #' @param hclust_method clustering method to use (default: 'complete') #' #' @return infercnv_obj -#' +#' #' @export #' split_references <- function(infercnv_obj, num_groups=2, hclust_method='complete') { - - + + flog.info(paste("::split_references:Start", sep="")) ref_expr_matrix = infercnv_obj@expr.data[ , get_reference_grouped_cell_indices(infercnv_obj) ] @@ -996,9 +1460,9 @@ split_references <- function(infercnv_obj, hc <- hclust(dist(t(ref_expr_matrix)), method=hclust_method) split_groups <- cutree(hc, k=num_groups) - + ref_groups <- list() - + grp_counter = 0 for (cut_group in unique(split_groups)) { grp_counter = grp_counter + 1 @@ -1014,7 +1478,7 @@ split_references <- function(infercnv_obj, #' @title remove_outliers_norm() -#' +#' #' @description Set outliers to some upper or lower bound. #' #' @param infercnv_obj infercnv_object @@ -1027,7 +1491,7 @@ split_references <- function(infercnv_obj, #' @param upper_bound setting the upper bound for the data (default: NA, uses out_method above) #' #' @return infercnv_obj with data bounds set accordingly. -#' +#' #' @export #' @@ -1035,22 +1499,30 @@ remove_outliers_norm <- function(infercnv_obj, out_method="average_bound", lower_bound=NA, upper_bound=NA) { - + flog.info(paste("::remove_outlier_norm:Start", "out_method:", out_method, "lower_bound:" , lower_bound, "upper_bound:", upper_bound)) - - + + infercnv_obj@expr.data <- .remove_outliers_norm(data=infercnv_obj@expr.data, out_method=out_method, lower_bound=lower_bound, upper_bound=upper_bound) + if (! is.null(infercnv_obj@.hspike)) { + flog.info("-mirroring for hspike") + infercnv_obj@.hspike <- remove_outliers_norm(infercnv_obj@.hspike, out_method, lower_bound, upper_bound) + } + return(infercnv_obj) } +#' @keywords internal +#' @noRd +#' .remove_outliers_norm <- function(data, out_method="average_bound", @@ -1061,9 +1533,9 @@ remove_outliers_norm <- function(infercnv_obj, "out_method:", out_method, "lower_bound:" , lower_bound, "upper_bound:", upper_bound)) - - + + if(is.null(data) || nrow(data) < 1 || ncol(data) < 1){ flog.error("::remove_outlier_norm: Error, something is wrong with the data, either null or no rows or columns") stop("Error, something is wrong with the data, either null or no rows or columns") @@ -1080,21 +1552,21 @@ remove_outliers_norm <- function(infercnv_obj, } else if (! is.na(out_method)) { - # using out_method instead of specified bounds. + # using out_method instead of specified bounds. flog.info(paste("::remove_outlier_norm using method:", out_method, "for defining outliers.")) - + if (out_method == "average_bound"){ - + bounds = .get_average_bounds(data) lower_bound = bounds[1] upper_bound = bounds[2] - + flog.info(sprintf("outlier bounds defined between: %g - %g", lower_bound, upper_bound)) } else { flog.error(paste("::remove_outlier_norm:Error, please", - "provide an approved method for outlier", - "removal for visualization.")) + "provide an approved method for outlier", + "removal for visualization.")) stop(991) } } else { @@ -1102,11 +1574,11 @@ remove_outliers_norm <- function(infercnv_obj, stop(992) } - # apply bounds + # apply bounds data[data < lower_bound] <- lower_bound data[data > upper_bound] <- upper_bound - - + + return(data) } @@ -1123,30 +1595,41 @@ remove_outliers_norm <- function(infercnv_obj, #' @param method method to select the center of the cell expression value. (default: 'mean', options: 'mean,'median') #' #' @return infercnv_object -#' +#' #' @export #' center_cell_expr_across_chromosome <- function(infercnv_obj, method="mean") { # or median - + flog.info(paste("::center_smooth across chromosomes per cell")) infercnv_obj@expr.data <- .center_columns(infercnv_obj@expr.data, method) + + if (! is.null(infercnv_obj@.hspike)) { + flog.info("-mirroring for hspike") + infercnv_obj@.hspike <- center_cell_expr_across_chromosome(infercnv_obj@.hspike, method) + } + + return(infercnv_obj) } -.center_columns <- function(expr_data, method) { +#' @keywords internal +#' @noRd +#' - # Center within columns (cells) +.center_columns <- function(expr_data, method) { + + # Center within columns (cells) if (method == "median") { - row_median <- apply(expr_data, 2, function(x) { median(x, na.rm=T) } ) + row_median <- apply(expr_data, 2, function(x) { median(x, na.rm=TRUE) } ) expr_data <- t(apply(expr_data, 1, "-", row_median)) } else { - # by mean - row_means <- apply(expr_data, 2, function(x) { mean(x, na.rm=T) } ) + # by mean + row_means <- apply(expr_data, 2, function(x) { mean(x, na.rm=TRUE) } ) expr_data <- t(apply(expr_data, 1, "-", row_means)) } @@ -1165,22 +1648,22 @@ center_cell_expr_across_chromosome <- function(infercnv_obj, method="mean") { # #' @param min_mean_expr_cutoff the minimum mean value allowed for a gene to be retained in the expression matrix. #' #' @return infercnv_obj the infercnv_object with lowly or unexpressed genes removed. -#' +#' #' @export #' require_above_min_mean_expr_cutoff <- function(infercnv_obj, min_mean_expr_cutoff) { - + flog.info(paste("::above_min_mean_expr_cutoff:Start", sep="")) - - + + indices <-.below_min_mean_expr_cutoff(infercnv_obj@expr.data, min_mean_expr_cutoff) if (length(indices) > 0) { flog.info(sprintf("Removing %d genes from matrix as below mean expr threshold: %g", length(indices), min_mean_expr_cutoff)) infercnv_obj <- remove_genes(infercnv_obj, indices) - + expr_dim = dim(infercnv_obj@expr.data) flog.info(sprintf("There are %d genes and %d cells remaining in the expr matrix.", expr_dim[1], expr_dim[2])) @@ -1188,21 +1671,24 @@ require_above_min_mean_expr_cutoff <- function(infercnv_obj, min_mean_expr_cutof } return(infercnv_obj) - + } +#' @keywords internal +#' @noRd +#' .below_min_mean_expr_cutoff <- function(expr_data, min_mean_expr) { - + average_gene <- rowMeans(expr_data) - - # Find averages above a certain threshold + + # Find averages above a certain threshold indices <- which(average_gene < min_mean_expr) - + return(indices) - -} +} + @@ -1211,28 +1697,28 @@ require_above_min_mean_expr_cutoff <- function(infercnv_obj, min_mean_expr_cutof #' @description Filters out genes that have fewer than specified number of cells expressing them. #' #' @param infercnv_obj infercnv_object -#' +#' #' @param min_cells_per_gene int indicating number of cells required per gene for both obs and ref data #' #' @return infercnv_obj infercnv_object with corresponding genes removed. -#' +#' #' @export #' require_above_min_cells_ref <- function(infercnv_obj, min_cells_per_gene) { genes_passed = which(apply(infercnv_obj@expr.data, 1, function(x) { sum(x>0 & ! is.na(x)) >= min_cells_per_gene})) - + num_genes_total = dim(infercnv_obj@expr.data)[1] num_removed = num_genes_total - length(genes_passed) if (num_removed > 0) { - + flog.info(sprintf("Removed %d genes having fewer than %d min cells per gene = %g %% genes removed here", num_removed, min_cells_per_gene, num_removed / num_genes_total * 100)) if (num_removed == num_genes_total) { - + flog.warn(paste("::All genes removed! Must revisit your data..., cannot continue here.")) stop(998) } @@ -1240,10 +1726,10 @@ require_above_min_cells_ref <- function(infercnv_obj, min_cells_per_gene) { infercnv_obj <- remove_genes(infercnv_obj, -1 * genes_passed) - + } else { - + flog.info("no genes removed due to min cells/gene filter") } @@ -1256,34 +1742,55 @@ require_above_min_cells_ref <- function(infercnv_obj, min_cells_per_gene) { #' @title clear_noise() #' #' @description Remove values that are too close to the reference cell expr average and are considered noise. -#' +#' #' @param infercnv_obj infercnv_object #' -#' @param threshold values within reference mean +- threshold are set to zero. +#' @param threshold values within reference mean +- threshold are set to zero. +#' +#' @param noise_logistic uses a logistic (sigmoidal) function to noise removal. #' -#' @return infercnv_obj +#' @return infercnv_obj #' #' @export #' -clear_noise <- function(infercnv_obj, threshold) { +clear_noise <- function(infercnv_obj, threshold, noise_logistic=FALSE) { flog.info(paste("********* ::clear_noise:Start. threshold: ", threshold, sep="")) - + if (threshold == 0) { return(infercnv_obj); # nothing to do } - - ref_idx = get_reference_grouped_cell_indices(infercnv_obj) - vals = infercnv_obj@expr.data[,ref_idx] - mean_ref_vals = mean(vals) - - infercnv_obj@expr.data <- .clear_noise(infercnv_obj@expr.data, threshold, center_pos=mean_ref_vals) - + if (has_reference_cells(infercnv_obj)) { + ref_idx = get_reference_grouped_cell_indices(infercnv_obj) + mean_ref_vals = mean(infercnv_obj@expr.data[,ref_idx]) + } else { + ## no reference + ## use mean of all data + mean_ref_vals = mean(infercnv_obj@expr.data) + } + + if (noise_logistic) { + + infercnv_obj <- depress_log_signal_midpt_val(infercnv_obj, mean_ref_vals, threshold) + + } else { + + infercnv_obj@expr.data <- .clear_noise(infercnv_obj@expr.data, threshold, center_pos=mean_ref_vals) + } + + if (! is.null(infercnv_obj@.hspike)) { + flog.info("-mirroring for hspike") + infercnv_obj@.hspike <- clear_noise(infercnv_obj@.hspike, threshold, noise_logistic) + } + return(infercnv_obj) } +#' @keywords internal +#' @noRd +#' .clear_noise <- function(expr_data, threshold, center_pos=0) { @@ -1291,7 +1798,7 @@ clear_noise <- function(infercnv_obj, threshold) { lower_bound = center_pos - threshold expr_data[expr_data > lower_bound & expr_data < upper_bound] = center_pos - + return(expr_data) } @@ -1308,31 +1815,53 @@ clear_noise <- function(infercnv_obj, threshold) { #' #' @param sd_amplifier multiplicative factor applied to the standard deviation to alter the noise #' range (default: 1.5) -#' +#' +#' @param noise_logistic uses a logistic (sigmoidal) function to noise removal. +#' #' @export #' -clear_noise_via_ref_mean_sd <- function(infercnv_obj, sd_amplifier=1.5) { - - ref_idx = get_reference_grouped_cell_indices(infercnv_obj) +clear_noise_via_ref_mean_sd <- function(infercnv_obj, sd_amplifier=1.5, noise_logistic=FALSE) { + + if (has_reference_cells(infercnv_obj)) { + ref_idx = get_reference_grouped_cell_indices(infercnv_obj) + flog.info("denoising using mean(normal) +- sd_amplifier * sd(normal) per gene per cell across all data") + } + else { + ref_idx = unlist(infercnv_obj@observation_grouped_cell_indices) + flog.info("-no reference cells specified... using mean and sd of all cells as proxy for denoising") + } vals = infercnv_obj@expr.data[,ref_idx] mean_ref_vals = mean(vals) - mean_ref_sd <- mean(apply(vals, 2, function(x) sd(x, na.rm=T))) * sd_amplifier - + mean_ref_sd <- mean(apply(vals, 2, function(x) sd(x, na.rm=TRUE))) * sd_amplifier + upper_bound = mean_ref_vals + mean_ref_sd lower_bound = mean_ref_vals - mean_ref_sd flog.info(paste(":: **** clear_noise_via_ref_quantiles **** : removing noise between bounds: ", - lower_bound, "-", upper_bound, sep=" ")) - - - smooth_matrix <- infercnv_obj@expr.data + lower_bound, "-", upper_bound, sep=" ")) + + + + if (noise_logistic) { + + threshold = mean_ref_sd + infercnv_obj <- depress_log_signal_midpt_val(infercnv_obj, mean_ref_vals, threshold) + + } else { + smooth_matrix <- infercnv_obj@expr.data + + smooth_matrix[smooth_matrix > lower_bound & smooth_matrix < upper_bound] = mean_ref_vals - smooth_matrix[smooth_matrix > lower_bound & smooth_matrix < upper_bound] = mean_ref_vals + infercnv_obj@expr.data <- smooth_matrix + } - infercnv_obj@expr.data <- smooth_matrix + if (! is.null(infercnv_obj@.hspike)) { + flog.info("-mirroring for hspike") + infercnv_obj@.hspike <- clear_noise_via_ref_mean_sd(infercnv_obj@.hspike, sd_amplifier, noise_logistic) + } return(infercnv_obj) } @@ -1353,15 +1882,21 @@ clear_noise_via_ref_mean_sd <- function(infercnv_obj, sd_amplifier=1.5) { # chr indices. # Returns: # Indices to remove. -.remove_tails <- function(smooth_matrix, chr, tail_length){ - #flog.info(paste("::remove_tails:Start.", sep="")) +#' @keywords internal +#' @noRd +#' + + +.remove_tails <- function(smooth_matrix, chr, tail_length){ + + #flog.info(paste("::remove_tails:Start.", sep="")) chr_length <- length(chr) if ((tail_length < 3) || (chr_length < 3)){ return(c()) } if (chr_length < (tail_length * 2)){ - tail_length <- floor(chr_length / 3) + tail_length <- floor(chr_length / 3) } remove_indices <- chr[1:tail_length] remove_indices <- c(remove_indices, @@ -1373,7 +1908,7 @@ clear_noise_via_ref_mean_sd <- function(infercnv_obj, sd_amplifier=1.5) { #' @title smooth_by_chromosome() -#' +#' #' @description Smooth expression values for each cell across each chromosome by using a #' moving average with a window of specified length. #' @@ -1384,7 +1919,7 @@ clear_noise_via_ref_mean_sd <- function(infercnv_obj, sd_amplifier=1.5) { #' @param smooth_ends perform smoothing at the ends of the chromosomes (default: TRUE) #' #' @return infercnv_obj -#' +#' #' @export #' @@ -1392,16 +1927,13 @@ smooth_by_chromosome <- function(infercnv_obj, window_length, smooth_ends=TRUE) gene_chr_listing = infercnv_obj@gene_order[[C_CHR]] chrs = unlist(unique(gene_chr_listing)) - + for (chr in chrs) { chr_genes_indices = which(gene_chr_listing == chr) - flog.debug(paste0("smooth_by_chromosome: chr: ",chr)) + flog.info(paste0("smooth_by_chromosome: chr: ",chr)) - input_data = data=infercnv_obj@expr.data[chr_genes_indices, , drop=F] - flog.debug(paste0("dim subset:", paste(dim(input_data), collapse=","))) + chr_data=infercnv_obj@expr.data[chr_genes_indices, , drop=FALSE] - chr_data=infercnv_obj@expr.data[chr_genes_indices, , drop=F] - if (nrow(chr_data) > 1) { smoothed_chr_data = .smooth_window(data=chr_data, window_length=window_length) @@ -1412,34 +1944,44 @@ smooth_by_chromosome <- function(infercnv_obj, window_length, smooth_ends=TRUE) } } + if (! is.null(infercnv_obj@.hspike)) { + flog.info("-mirroring for hspike") + infercnv_obj@.hspike <- smooth_by_chromosome(infercnv_obj@.hspike, window_length, smooth_ends) + } + + return(infercnv_obj) } +#' @keywords internal +#' @noRd +#' .smooth_window <- function(data, window_length) { - - flog.info(paste("::smooth_window:Start.", sep="")) + + flog.debug(paste("::smooth_window:Start.", sep="")) if (window_length < 2){ flog.warn("window length < 2, returning original unmodified data") return(data) } - - num_genes <- nrow(data) - flog.debug(paste("::smooth_window: dim data_sm: ", dim(data_sm), sep=" ")) - # Fix ends that couldn't be smoothed since not spanned by win/2 at ends. - # data_sm <- apply(data_sm, + ## Fix ends that couldn't be smoothed since not spanned by win/2 at ends. + data_sm <- apply(data, 2, .smooth_helper, window_length=window_length) -# tail_length=tail_length) - # Set back row and column names + + ## Set back row and column names row.names(data_sm) <- row.names(data) colnames(data_sm) <- colnames(data) + + flog.debug(paste("::smooth_window: dim data_sm: ", dim(data_sm), sep=" ")) + + return(data_sm) } @@ -1454,54 +1996,58 @@ smooth_by_chromosome <- function(infercnv_obj, window_length, smooth_ends=TRUE) # Data smoothed. ##.smooth_helper <- function(obs_data, tail_length) { +#' @keywords internal +#' @noRd +#' + .smooth_helper <- function(obs_data, window_length) { - # strip NAs out and replace after smoothing + # strip NAs out and replace after smoothing orig_obs_data = obs_data - + nas = is.na(obs_data) - + obs_data = obs_data[!nas] - + obs_length <- length(obs_data) end_data <- obs_data - + tail_length = (window_length - 1)/2 if (obs_length >= window_length) { end_data <- .smooth_center_helper(obs_data, window_length) } - # end_data will have the end positions replaced with mean values, smoothing just at the ends. - + # end_data will have the end positions replaced with mean values, smoothing just at the ends. + obs_count <- length(obs_data) - + numerator_counts_vector = c(c(1:tail_length), tail_length + 1, c(tail_length:1)) - # defining the iteration range in cases where the window size is larger than the number of genes. In that case we only iterate to the half since the process is applied from both ends. + # defining the iteration range in cases where the window size is larger than the number of genes. In that case we only iterate to the half since the process is applied from both ends. iteration_range = ifelse(obs_count > window_length, tail_length, ceiling(obs_count/2)) - + for (tail_end in 1:iteration_range) { end_tail = obs_count - tail_end + 1 - + d_left = tail_end - 1 d_right = obs_count - tail_end d_right = ifelse(d_right > tail_length, tail_length, d_right) - + r_left = tail_length - d_left r_right = tail_length - d_right - + denominator = (((window_length - 1)/2)^2 + window_length) - ((r_left * (r_left + 1))/2) - ((r_right * (r_right + 1))/2) - + left_input_vector_chunk = obs_data[1:(tail_end + d_right)] right_input_vector_chunk = obs_data[(end_tail - d_right):obs_length] - + numerator_range = numerator_counts_vector[(tail_length + 1 - d_left):(tail_length + 1 + d_right)] - + end_data[tail_end] = sum(left_input_vector_chunk * numerator_range)/denominator end_data[end_tail] = sum(right_input_vector_chunk * rev(numerator_range))/denominator } - + orig_obs_data[! nas] = end_data # replace original data with end-smoothed data - + return(orig_obs_data) } @@ -1514,30 +2060,80 @@ smooth_by_chromosome <- function(infercnv_obj, window_length, smooth_ends=TRUE) # # Returns: # Vector of values smoothed with a moving average. -.smooth_center_helper <- function(obs_data, window_length){ +#' @keywords internal +#' @noRd +#' + +.smooth_center_helper <- function(obs_data, window_length){ + nas = is.na(obs_data) vals = obs_data[! nas] - + custom_filter_denominator = ((window_length-1)/2)^2 + window_length custom_filter_numerator = c(c(1:((window_length-1)/2)), ((window_length-1)/2)+1, c(((window_length-1)/2):1)) - + custom_filter = custom_filter_numerator/rep(custom_filter_denominator, window_length) - -# flog.info(paste("custom filter = ", custom_filter, "\n and window_length =", window_length, "\n and nrow(data) = ", nrow(obs_data), sep="")) - - #smoothed = filter(vals, rep(1 / window_length, window_length), sides=2) + + # flog.info(paste("custom filter = ", custom_filter, "\n and window_length =", window_length, "\n and nrow(data) = ", nrow(obs_data), sep="")) + + #smoothed = filter(vals, rep(1 / window_length, window_length), sides=2) smoothed = filter(vals, custom_filter, sides=2) - + ind = which(! is.na(smoothed)) vals[ind] = smoothed[ind] - + obs_data[! nas] = vals - + return(obs_data) } +#' @title smooth_by_chromosome_runmeans +#' +#' @description uses the simpler caTools:runmeans() to perform smoothing operations. +#' +#' @param infercnv_obj infercnv object +#' +#' @param window_length window length to use for smoothing. +#' +#' @return infercnv_obj +#' +#' @export + + +smooth_by_chromosome_runmeans <- function(infercnv_obj, window_length) { + + gene_chr_listing = infercnv_obj@gene_order[[C_CHR]] + chrs = unlist(unique(gene_chr_listing)) + + for (chr in chrs) { + chr_genes_indices = which(gene_chr_listing == chr) + flog.info(paste0("smooth_by_chromosome: chr: ",chr)) + + chr_data=infercnv_obj@expr.data[chr_genes_indices, , drop=FALSE] + + if (nrow(chr_data) > 1) { + chr_data = apply(chr_data, 2, caTools::runmean, k=window_length) + + infercnv_obj@expr.data[chr_genes_indices, ] <- chr_data + } + } + + if (! is.null(infercnv_obj@.hspike)) { + flog.info("-mirroring for hspike") + infercnv_obj@.hspike <- smooth_by_chromosome_runmeans(infercnv_obj@.hspike, window_length) + } + + + return(infercnv_obj) +} + + + + + + #' @title get_average_bounds() #' #' @description Computes the mean of the upper and lower bound for the data across all cells. @@ -1545,7 +2141,7 @@ smooth_by_chromosome <- function(infercnv_obj, window_length, smooth_ends=TRUE) #' @param infercnv_obj infercnv_object #' #' @return (lower_bound, upper_bound) -#' +#' #' @export #' @@ -1555,6 +2151,11 @@ get_average_bounds <- function (infercnv_obj) { } + +#' @keywords internal +#' @noRd +#' + .get_average_bounds <- function(expr_matrix) { lower_bound <- mean(apply(expr_matrix, 2, @@ -1572,18 +2173,23 @@ get_average_bounds <- function (infercnv_obj) { #' @param infercnv_obj infercnv_object #' #' @return infercnv_obj -#' +#' #' @export #' log2xplus1 <- function(infercnv_obj) { - + flog.info("transforming log2xplus1()") infercnv_obj@expr.data <- log2(infercnv_obj@expr.data + 1) - + + if (! is.null(infercnv_obj@.hspike)) { + flog.info("-mirroring for hspike") + infercnv_obj@.hspike <- log2xplus1(infercnv_obj@.hspike) + } + return(infercnv_obj) - + } @@ -1596,16 +2202,21 @@ log2xplus1 <- function(infercnv_obj) { #' @param infercnv_obj infercnv_object #' #' @return infercnv_obj -#' +#' #' @export #' invert_log2xplus1 <- function(infercnv_obj) { - + flog.info("inverting log2xplus1()") infercnv_obj@expr.data <- 2^infercnv_obj@expr.data - 1 - + + if (! is.null(infercnv_obj@.hspike)) { + flog.info("-mirroring for hspike") + infercnv_obj@.hspike <- invert_log2xplus1(infercnv_obj@.hspike) + } + return(infercnv_obj) } @@ -1618,16 +2229,21 @@ invert_log2xplus1 <- function(infercnv_obj) { #' @param infercnv_obj infercnv_object #' #' @return infercnv_obj -#' +#' #' @export #' invert_log2 <- function(infercnv_obj) { - + flog.info("invert_log2(), computing 2^x") infercnv_obj@expr.data <- 2^infercnv_obj@expr.data - + + if (! is.null(infercnv_obj@.hspike)) { + flog.info("-mirroring for hspike") + infercnv_obj@.hspike <- invert_log2(infercnv_obj@.hspike) + } + return(infercnv_obj) } @@ -1640,18 +2256,24 @@ invert_log2 <- function(infercnv_obj) { #' @param infercnv_obj infercnv_object #' #' @return infercnv_obj -#' +#' #' @export #' make_zero_NA <- function(infercnv_obj) { - + flog.info("make_zero_NA()") infercnv_obj@expr.data <- infercnv_obj@expr.data[infercnv_obj@expr.data == 0] <- NA + + if (! is.null(infercnv_obj@.hspike)) { + flog.info("-mirroring for hspike") + infercnv_obj@.hspike <- make_zero_NA(infercnv_obj@.hspike) + } + return(infercnv_obj) - + } @@ -1665,36 +2287,40 @@ make_zero_NA <- function(infercnv_obj) { #' @param infercnv_obj infercnv_object #' #' @return infercnv_obj -#' +#' #' @export #' transform_to_reference_based_Zscores <- function(infercnv_obj) { - # center and convert to z-scores + ## center and convert to z-scores flog.info(paste("::center_and_Zscore_conversion", sep="")) - # remember, genes are rows, cells are cols - - # centering and z-scores based on the reference (normal) cells: + ## remember, genes are rows, cells are cols - # ref data represent the null distribution + ## centering and z-scores based on the reference (normal) cells: + + ## ref data represent the null distribution ref_idx = get_reference_grouped_cell_indices(infercnv_obj) ref_data = infercnv_obj@expr.data[,ref_idx] - gene_ref_mean = apply(ref_data, 1, function(x) {mean(x, na.rm=T)}) - gene_ref_sd = apply(ref_data, 1, function(x) {sd(x, na.rm=T)}) - - # assume at least Poisson level variation + gene_ref_mean = apply(ref_data, 1, function(x) {mean(x, na.rm=TRUE)}) + gene_ref_sd = apply(ref_data, 1, function(x) {sd(x, na.rm=TRUE)}) + + ## assume at least Poisson level variation gene_ref_sd = pmax(gene_ref_sd, gene_ref_mean) - # center all genes at the ref (normal) center: + ## center all genes at the ref (normal) center: infercnv_obj@expr.data = sweep(infercnv_obj@expr.data, 1, gene_ref_mean, FUN="-") - # convert to z-scores based on the ref (normal) distribution + ## convert to z-scores based on the ref (normal) distribution infercnv_obj@expr.data = sweep(infercnv_obj@expr.data, 1, gene_ref_sd, FUN="/") # make all data z-scores based on the ref data distribution. + if (! is.null(infercnv_obj@.hspike)) { + flog.info("-mirroring for hspike") + infercnv_obj@.hspike <- transform_to_reference_based_Zscores(infercnv_obj@.hspike) + } return(infercnv_obj) @@ -1708,16 +2334,21 @@ transform_to_reference_based_Zscores <- function(infercnv_obj) { #' @param infercnv_obj infercnv_object #' #' @return infercnv_obj -#' +#' #' @export #' mean_center_gene_expr <- function(infercnv_obj) { - + flog.info(paste("::centering", sep="")) - - infercnv_obj@expr.data <- sweep(infercnv_obj@expr.data, 1, rowMeans(infercnv_obj@expr.data, na.rm=T), FUN="-") - + + infercnv_obj@expr.data <- sweep(infercnv_obj@expr.data, 1, rowMeans(infercnv_obj@expr.data, na.rm=TRUE), FUN="-") + + if (! is.null(infercnv_obj@.hspike)) { + flog.info("-mirroring for hspike") + infercnv_obj@.hspike <- mean_center_gene_expr(infercnv_obj@.hspike) + } + return(infercnv_obj) } @@ -1729,14 +2360,14 @@ mean_center_gene_expr <- function(infercnv_obj) { #' @param infercnv_obj infercnv_object #' #' @return vector of column indices -#' +#' #' @export #' get_reference_grouped_cell_indices <- function(infercnv_obj) { - + return( unlist(infercnv_obj@reference_grouped_cell_indices) ) - + } @@ -1748,16 +2379,23 @@ get_reference_grouped_cell_indices <- function(infercnv_obj) { #' #' @param threshold value to threshold the data #' +#' @return infercnv_obj +#' #' @export #' apply_max_threshold_bounds <- function(infercnv_obj, threshold) { - + flog.info(paste("::process_data:setting max centered expr, threshold set to: +/-: ", threshold)) - + infercnv_obj@expr.data[infercnv_obj@expr.data > threshold] <- threshold infercnv_obj@expr.data[infercnv_obj@expr.data < (-1 * threshold)] <- -1 * threshold - + + if (! is.null(infercnv_obj@.hspike)) { + flog.info("-mirroring for hspike") + infercnv_obj@.hspike <- apply_max_threshold_bounds(infercnv_obj@.hspike, threshold) + } + return(infercnv_obj) } @@ -1765,36 +2403,38 @@ apply_max_threshold_bounds <- function(infercnv_obj, threshold) { #' @title remove_genes_at_ends_of_chromosomes() #' #' @description Removes genes that are within window_length/2 of the ends of each chromosome. -#' +#' #' @param infercnv_obj infercnv_object #' #' @param window_length length of the window to use. -#' +#' +#' @return infercnv_obj +#' #' @export #' remove_genes_at_ends_of_chromosomes <- function(infercnv_obj, window_length) { - + contig_tail = (window_length - 1) / 2 remove_indices <- c() gene_chr_listing = infercnv_obj@gene_order[[C_CHR]] chrs = unlist(unique(gene_chr_listing)) for (chr in chrs){ - #flog.info(paste("::process_data:Remove tail contig ",chr, ".", sep="")) + #flog.info(paste("::process_data:Remove tail contig ",chr, ".", sep="")) remove_chr <- .remove_tails(infercnv_obj@expr.data, which(gene_chr_listing == chr), contig_tail) - #flog.debug(paste("::process_data:Remove tail - removing indices for chr: ", chr, ", count: ", length(remove_chr), sep="")) - + #flog.debug(paste("::process_data:Remove tail - removing indices for chr: ", chr, ", count: ", length(remove_chr), sep="")) + remove_indices <- c(remove_indices, remove_chr) - + } if (length(remove_indices) > 0){ - + infercnv_obj = remove_genes(infercnv_obj, remove_indices) - + flog.info(paste("::process_data:Remove genes at chr ends, ", "new dimensions (r,c) = ", paste(dim(infercnv_obj@expr.data), collapse=","), @@ -1808,10 +2448,15 @@ remove_genes_at_ends_of_chromosomes <- function(infercnv_obj, window_length) { flog.error("No genes removed at chr ends.... something wrong here") stop(1234) } - - + + + if (! is.null(infercnv_obj@.hspike)) { + flog.info("-mirroring for hspike") + infercnv_obj@.hspike <- remove_genes_at_ends_of_chromosomes(infercnv_obj@.hspike, window_length) + } + return(infercnv_obj) - + } @@ -1820,107 +2465,163 @@ remove_genes_at_ends_of_chromosomes <- function(infercnv_obj, window_length) { #' @description Normalizes count data by total sum scaling #' #' For single cell data, a typical normalization factor is 1e5, providing counts per 100k total counts. -#' If a normalization factor is not provided, one is estimated based on: -#' 10^round(log10(mean(column_sums))) -#' +#' If a normalization factor is not provided, the median lib size is used.: +#' #' @param infercnv_obj infercnv_object #' #' @param normalize_factor total counts to scale the normalization to (default: NA, computed as described above) -#' +#' +#' @return infercnv_obj +#' #' @export #' normalize_counts_by_seq_depth <- function(infercnv_obj, normalize_factor=NA) { - + data <- infercnv_obj@expr.data - cs = colSums(data) + normalized_data <- .normalize_data_matrix_by_seq_depth(data, normalize_factor) + + + infercnv_obj@expr.data <- normalized_data + + return(infercnv_obj) + +} + - # make fraction of total counts: +#' @keywords internal +#' @noRd +#' + +.normalize_data_matrix_by_seq_depth <- function(counts.matrix, normalize_factor=NA) { + + flog.info("normalizing counts matrix by depth") + + data <- counts.matrix + + cs = colSums(data) + + print(cs) + + ## make fraction of total counts: data <- sweep(data, STATS=cs, MARGIN=2, FUN="/") if (is.na(normalize_factor)) { + + normalize_factor = median(cs) - normalize_factor = .compute_normalization_factor_from_column_sums(cs) + flog.info(sprintf("Computed total sum normalization factor as median libsize: %f", normalize_factor)) - flog.info(sprintf("Computed total sum normalization factor as: %f", normalize_factor)) - } else { flog.info(sprintf("Using specified normalization factor: %f", normalize_factor)) } - + + if (is.na(normalize_factor)) { + stop("Error, normalize factor not estimated") + } + data <- data * normalize_factor + + return(data) + +} - infercnv_obj@expr.data <- data - return(infercnv_obj) - -} -#' @title compute_normalization_factor() +#' @title anscombe_transform() #' -#' @description computes norm factor as: -#' normalize_factor = 10^round(log10(mean(cs))) +#' @description Performs Anscombe's transformation: +#' y = 2 * sqrt(x + 3/8) +#' as per +#' https://en.wikipedia.org/wiki/Anscombe_transform #' #' @param infercnv_obj infercnv_object #' -#' @return normalization_factor +#' @return infercnv_obj #' #' @export #' -compute_normalization_factor <- function(infercnv_obj) { - - data <- infercnv_obj@expr.data +anscombe_transform <- function(infercnv_obj) { - cs = colSums(data) + infercnv_obj@expr.data <- 2 * sqrt(infercnv_obj@expr.data + 3/8) - normalize_factor = .compute_normalization_factor_from_column_sums(cs) - - return(normalize_factor) - -} + if (! is.null(infercnv_obj@.hspike)) { + flog.info("-mirroring for hspike") + infercnv_obj@.hspike <- anscombe_transform(infercnv_obj@.hspike) + } + + return(infercnv_obj) +} + #' @keywords internal #' @noRd #' -.compute_normalization_factor_from_column_sums <- function(cs) { - - normalize_factor = 10^round(log10(mean(cs))) - - return(normalize_factor) +add_pseudocount <- function(infercnv_obj, pseudocount) { + + flog.info(sprintf("Adding pseudocount: %g", pseudocount)) + + infercnv_obj@expr.data = infercnv_obj@expr.data + pseudocount + + if (! is.null(infercnv_obj@.hspike)) { + flog.info("-mirroring for hspike") + infercnv_obj@.hspike <- add_pseudocount(infercnv_obj@.hspike, pseudocount) + } + + return(infercnv_obj) } - -#' @title anscombe_transform() +#' @title scale_infercnv_expr #' -#' @description Performs Anscombe's transformation: -#' y = 2 * sqrt(x + 3/8) -#' as per -#' https://en.wikipedia.org/wiki/Anscombe_transform +#' @description performs scaling to expression values for each cell, +#' assigning all values to a standard normal centered at zero. #' -#' @param infercnv_obj infercnv_object +#' @param infercnv_obj infercnv object #' -#' @export +#' @return infercnv_obj #' +#' @export -anscombe_transform <- function(infercnv_obj) { - - infercnv_obj@expr.data <- 2 * sqrt(infercnv_obj@expr.data + 3/8) +scale_infercnv_expr <- function(infercnv_obj) { - return(infercnv_obj) + flog.info("-scaling expr data") + infercnv_obj@expr.data = t(scale(t(infercnv_obj@expr.data))) + if (! is.null(infercnv_obj@.hspike)) { + flog.info("-mirroring for hspike") + infercnv_obj@.hspike <- scale_infercnv_expr(infercnv_obj@.hspike) + } + + return(infercnv_obj) } + + #' @keywords internal #' @noRd #' -add_pseudocount <- function(infercnv_obj, pseudocount) { - - flog.info(sprintf("Adding pseudocount: %g", pseudocount)) - - infercnv_obj@expr.data = infercnv_obj@expr.data + pseudocount +cross_cell_normalize <- function(infercnv_obj) { + + ## using upper quartile normalization + + flog.info("-cross cell normalization") + + upper_quart = apply(infercnv_obj@expr.data, 2, quantile, probs=0.75) + mean_upper_quart = mean(upper_quart) + infercnv_obj@expr.data = sweep(infercnv_obj@expr.data, 2, mean_upper_quart/upper_quart, "*") + + + if (! is.null(infercnv_obj@.hspike)) { + flog.info("-mirroring for hspike") + infercnv_obj@.hspike <- cross_cell_normalize(infercnv_obj@.hspike) + } + return(infercnv_obj) + + } + diff --git a/R/inferCNV_simple_sim.R b/R/inferCNV_simple_sim.R new file mode 100644 index 00000000..0f736718 --- /dev/null +++ b/R/inferCNV_simple_sim.R @@ -0,0 +1,315 @@ + +##' .get_simulated_cell_matrix() +##' +##' generates a simulated grouping of cells vs. genes based on a cell expression matrix and +##' the mean/variance relationship for all genes in all cell groupings. +##' +##' Cells are simulated as so: +##' The mean for genes in the normal cells are computed +##' A random expression value is chosen for each gene using a negative binomial distribution with specified common dispersion +##' +##' Genes are named according to the input expression matrix, and cells are named 'spike_{number}'. +##' +##' @param mean_var_table : a data.frame containing three columns: group_name, mean, variance of expression per gene per grouping. +##' +##' @param normal_cell_expr : expression matrix of normal cells to guide the simulation. Should be total sum normalized. +##' +##' @param num_cells : number of cells to simulate +##' +##' @param common_dispersion: in rnbinom(size=1/common_dispersion). Set to very small number to achieve Poisson. +##' +##' @return matrix containing simulated expression values. +##' +##' @keywords internal +##' @noRd +##' + +.get_simulated_cell_matrix <- function(gene_means, mean_p0_table, num_cells, common_dispersion) { + + # should be working on the total sum count normalized data. + # model the mean variance relationship + + ngenes = length(gene_means) + + dropout_logistic_params <- NULL + if (! is.null(mean_p0_table)) { + tryCatch ( + dropout_logistic_params <- .get_logistic_params(mean_p0_table), + error=function(x) { cat(sprintf("(%s), zero inflation couldn't be estimated from data. Using just neg binom now\n", x)) } + ) + } + + spike_cell_names = paste0('spike_cell_', 1:num_cells) + + sim_cell_matrix = matrix(rep(0,ngenes*num_cells), nrow=ngenes) + rownames(sim_cell_matrix) = names(gene_means) + colnames(sim_cell_matrix) = spike_cell_names + + sim_expr_vals <- function(gene_idx) { + m = gene_means[gene_idx] + return(.sim_expr_val(m, dropout_logistic_params, common_dispersion=common_dispersion)) + } + + for (i in 1:num_cells) { + newvals = sapply(1:ngenes, FUN=sim_expr_vals) + sim_cell_matrix[,i] = newvals + } + + return(sim_cell_matrix) +} + +##' @keywords internal +##' @noRd +##' + +.sim_expr_val <- function(m, dropout_logistic_params, common_dispersion=0.1, use_spline=TRUE) { + + # include drop-out prediction + + val = 0 + if (m > 0) { + + val = rnbinom(n=1, mu=m, size=1/common_dispersion) + + if ( (! is.null(dropout_logistic_params)) & val > 0) { + + if (use_spline) { + dropout_prob <- predict(dropout_logistic_params$spline, log(val))$y[1] + } else { + dropout_prob <- .logistic_midpt_slope(x=log(val), midpt=dropout_logistic_params$midpt, slope=dropout_logistic_params$slope) + } + if (runif(1) <= dropout_prob) { + ## a drop-out + val = 0 + } + } + } + + return(val) +} + + + +#' Computes probability of seeing a zero expr val as a function of the mean gene expression +#' The p(0 | mean_expr) is computed separately for each sample grouping. +#' +#' @keywords internal +#' @noRd +#' + +.get_mean_vs_p0_table <- function(infercnv_obj) { + + group_indices = c(infercnv_obj@observation_grouped_cell_indices, infercnv_obj@reference_grouped_cell_indices) + + + mean_vs_p0_table <- .get_mean_vs_p0_table_from_matrix(infercnv_obj@expr.data, group_indices) + + return(mean_vs_p0_table) + +} + +.get_mean_vs_p0_table_from_matrix <- function(expr.matrix, cell_groupings=NULL) { + + if (is.null(cell_groupings)) { + ## use all cells as single group + cell_groupings = list(allcells=seq(ncol(expr.matrix))) + } + + mean_p0_table = NULL + + for (group_name in names(cell_groupings)) { + #flog.info(sprintf("processing group: %s", group_name)) + expr.data = expr.matrix[, cell_groupings[[ group_name ]] ] + + group_mean_p0_table <- .get_mean_vs_p0_from_matrix(expr.data) + group_mean_p0_table[[ 'group_name' ]] <- group_name + + if (is.null(mean_p0_table)) { + mean_p0_table = group_mean_p0_table + } else { + mean_p0_table = rbind(mean_p0_table, group_mean_p0_table) + } + } + + return(mean_p0_table) +} + +#' Computes probability of seeing a zero expr val as a function of the mean gene expression +#' based on the input expression matrix. +#' +#' @keywords internal +#' @noRd +#' + +.get_mean_vs_p0_from_matrix <- function(expr.data) { + ncells = ncol(expr.data) + m = rowMeans(expr.data) + numZeros = apply(expr.data, 1, function(x) { sum(x==0) }) + + pZero = numZeros/ncells + + mean_p0_table = data.frame(m=m, p0=pZero) + + return(mean_p0_table) +} + + +#' +#' Logistic function +#' +#' InferCNV note: Standard function here, but lifted from +#' Splatter (Zappia, Phipson, and Oshlack, 2017) +#' https://genomebiology.biomedcentral.com/articles/10.1186/s13059-017-1305-0 +#' +#' Implementation of the logistic function +#' +#' @param x value to apply the function to. +#' @param x0 midpoint parameter. Gives the centre of the function. +#' @param k shape parameter. Gives the slope of the function. +#' +#' @return Value of logistic function with given parameters +#' +#' @keywords internal +#' @noRd +#' +.logistic_midpt_slope <- function(x, midpt, slope) { + 1 / (1 + exp(-slope * (x - midpt))) +} + + + +#' Given the mean, p0 table, fits the data to a logistic function to compute +#' the shape of the logistic distribution. +#' +#' @keywords internal +#' @noRd +#' + +.get_logistic_params <- function(mean_p0_table) { + + mean_p0_table <- mean_p0_table[mean_p0_table$m > 0, ] # remove zeros, can't take log. + + x = log(mean_p0_table$m) + y = mean_p0_table$p0 + + df = data.frame(x,y) + + # write.table(df, "_logistic_params", quote=FALSE, sep="\t") # debugging... + + fit <- nls(y ~ .logistic_midpt_slope(x, midpt = x0, slope = k), + data = df, + start = list(x0 = mean(x), k = -1)) # borrowed/updated from splatter + + logistic_params = list() + + logistic_params[[ 'midpt' ]] <- summary(fit)$coefficients["x0", "Estimate"] + logistic_params[[ 'slope' ]] <- summary(fit)$coefficients["k", "Estimate"] + + ## also fit a spline + s = smooth.spline(x, mean_p0_table$p0) + logistic_params[[ 'spline' ]] = s + + + return(logistic_params) +} + + + +.estimate_common_dispersion <- function(expr.data) { + + ## estimate common disp from these data: + ## creds to splatter + design <- matrix(1, ncol(expr.data), 1) + + disps <- edgeR::estimateDisp(expr.data, design = design) + + common_dispersion = disps$common.dispersion + + flog.info(sprintf("-edgeR::estimateDisp() -> %g", common_dispersion)) + + return(common_dispersion) + +} + + + +KS_plot <- function(title, tumor_expr, hspike_expr, names=NULL) { + + tumor_ecdf = ecdf(tumor_expr) + hspike_ecdf = ecdf(hspike_expr) + val_range = range(tumor_expr, hspike_expr) + step = (val_range[2] - val_range[1])/100 + vals = seq(val_range[1], val_range[2], step) + + tumor_cdf = tumor_ecdf(vals) + hspike_cdf = hspike_ecdf(vals) + + cdfs = data.frame(vals, + tumor_cdf, + hspike_cdf) + + if ( (! is.null(names)) & length(names) == 2) { + + colnames(cdfs)[2] <- names[1] + colnames(cdfs)[3] <- names[2] + + name1 <- names[1] + name2 <- names[2] + } else { + name1 = 'tumor_cdf' + name2 = 'hspike_cdf' + } + + ks_point = which.max(abs(cdfs[,2] - cdfs[,3])) + ks_point_info = cdfs[ks_point,] + ##message("KS point info: ", paste(ks_point_info, collapse=', ')) + + cdfs = cdfs %>% gather(name1, name2, key='type', value='cdf') + + p = ggplot(cdfs, aes_string(x=vals, y='cdf')) + + geom_line(aes_string(color='type', linetype='type')) + + geom_segment(aes(x=ks_point_info$vals, + y=ks_point_info[[name1]], + xend=ks_point_info$vals, + yend=ks_point_info[[name2]]), color='magenta', size=2) + + ggtitle(title) + xlab("expr.val") + ylab("cdf") + + plot(p) + +} + + + +.mean_vs_p0_to_stats <- function(mean_vs_p0_table) { + + logm <- log(mean_vs_p0_table$m + 1) + p0 <- mean_vs_p0_table$p0 + + x_approx_mid <- median(logm[which(p0>0.2 & p0 < 0.8)]) + + x <- logm + y <- p0 + df <- data.frame(x,y) + + fit <- nls(y ~ .logistic(x, x0 = x0, k = k), data = df, + start = list(x0 = x_approx_mid, k = -1)) + + logistic_x <- x + logistic_y <- predict(fit, newdata=x) + + ## also try fitting a spline + spline.fit <- smooth.spline(x,y) + spline.pts = predict(spline.fit, newdata=x) + + ret = list(logistic_x = logistic_x, + logistic_y = logistic_y, + spline_x <- spline.pts$x, + spline_y <- spline.pts$y, + spline.fit <- spline.fit, + logistic.fit <- fit) + + + return(ret) +} + + diff --git a/R/inferCNV_spike.R b/R/inferCNV_spike.R deleted file mode 100644 index b50889c1..00000000 --- a/R/inferCNV_spike.R +++ /dev/null @@ -1,431 +0,0 @@ - -#' @title spike_in_variation_chrs() -#' -#' Adds a 'SPIKE'-in to the observations set at different thresholds of loss/gain to -#' aid in tracking the effect of infercnv operations and for defining the final scaling. -#' -#' -#' @param infercnv_obj An infercnv object populated with raw count data -#' -#' @param spike_in_chrs : define the chromsomes that will serve as signal for gain/loss -#' default: picks chrosomes in order of size -#' -#' @param min_genes_per_chr : default 100 -#' -#' @param spike_in_multiplier_vec : factors that define relative expression for gain/loss -#' and must match ordering of spike_in_chrs above -#' default: c(0.01, 2.0) -#' -#' @param max_cells max number of cells to incorporate in the spike-in -#' -#' @export - -spike_in_variation_chrs <- function(infercnv_obj, - spike_in_chrs=NULL, - spike_in_multiplier_vec=c(0.01, 2.0), - max_cells=100, - min_genes_per_chr=100) { - - - if (is.null(spike_in_chrs)) { - num_chrs_want = length(spike_in_multiplier_vec) - spike_in_chrs = .select_longest_chrs(infercnv_obj, num_chrs_want) - flog.info(paste("Selecting longest chrs for adding spike:", paste(spike_in_chrs, collapse=","))) - } else { - flog.info(paste("Using specified chrs for adding spike:", paste(spike_in_chrs, collapse=","))) - } - - min_genes_selected = min_genes_per_chr - - ## get the gene ordering: - gene_selection_listing = list() - for (chr in spike_in_chrs) { - chr_genes = which(infercnv_obj@gene_order$chr == chr) - if (length(chr_genes) < min_genes_selected) { - flog.error(sprintf("Error, have %d genes found for chr %s, < min %d required", - length(chr_genes), - chr, - min_genes_selected)) - stop("error") - } - gene_selection_listing[[chr]] = chr_genes - } - - infercnv_obj <- .spike_in_variation_genes_via_modeling(infercnv_obj, gene_selection_listing, spike_in_multiplier_vec, max_cells=max_cells) - - return(infercnv_obj) -} - - -##' @title .spike_in_variation_genes_via_modeling() -##' -##' Creates the spike-in based on a list of genes. -##' -##' @param infercnv_obj An infercnv object populated with raw count data -##' -##' @param gene_selection_listing : list of [[chr]] = [1,2,3,4,...] corresponding to indices (rows) of genes, -##' and should match order of spike_in_multiplier_vec below. -##' -##' @param spike_in_multiplier_vec : vector of factors corresponding to gain/loss multipliers matching order in the gene list above. -##' -##' @param max_cells : max number of cells to include in the spike-in -##' -##' @keywords internal -##' @noRd -##' -.spike_in_variation_genes_via_modeling <- function(infercnv_obj, gene_selection_listing, spike_in_multiplier_vec, max_cells=max_cells) { - - mvtable = .get_mean_var_table(infercnv_obj) - normal_cells_idx = infercnv::get_reference_grouped_cell_indices(infercnv_obj) - normal_cells_expr = infercnv_obj@expr.data[,normal_cells_idx] - - # zeros are a problem here... - gene_means = rowMeans(normal_cells_expr) - - mean_p0_table = .get_mean_vs_p0_table(infercnv_obj) - - ## apply spike-in multiplier vec - for (i in 1:length(spike_in_multiplier_vec)) { - - gene_indices = gene_selection_listing[[i]] - multiplier = spike_in_multiplier_vec[i] - - gene_means[gene_indices] = gene_means[gene_indices] * multiplier - } - - ## get simulated matrix - sim_matrix = .get_simulated_cell_matrix(gene_means, mean_p0_table, max_cells) - - ## integrate into expr data and count data matrices - ncol_begin = ncol(infercnv_obj@expr.data) + 1 - ncol_end = ncol_begin + max_cells - 1 - - infercnv_obj@expr.data = cbind( infercnv_obj@expr.data, sim_matrix ) - infercnv_obj@count.data = cbind( infercnv_obj@count.data, sim_matrix ) # just so it validates... not useful, otherwise - - infercnv_obj@observation_grouped_cell_indices[['SPIKE']] = ncol_begin:ncol_end - - validate_infercnv_obj(infercnv_obj) - - - return(infercnv_obj) -} - - -##' .get_simulated_cell_matrix() -##' -##' generates a simulated grouping of cells vs. genes based on a cell expression matrix and -##' the mean/variance relationship for all genes in all cell groupings. -##' -##' Cells are simulated as so: -##' The mean for genes in the normal cells are computed -##' A random expression value is chosen for each gene using a negative binomial distribution with dispersion = 0.1 -##' -##' Genes are named according to the input expression matrix, and cells are named 'spike_{number}'. -##' -##' @param mean_var_table : a data.frame containing three columns: group_name, mean, variance of expression per gene per grouping. -##' -##' @param normal_cell_expr : expression matrix of normal cells to guide the simulation. Should be total sum normalized. -##' -##' @param num_cells : number of cells to simulate -##' -##' @return matrix containing simulated expression values. -##' -##' @keywords internal -##' @noRd -##' - -.get_simulated_cell_matrix <- function(gene_means, mean_p0_table, num_cells) { - - # should be working on the total sum count normalized data. - # model the mean variance relationship - - ngenes = length(gene_means) - - dropout_logistic_params <- .get_logistic_params(mean_p0_table) - - - spike_cell_names = paste0('spike_cell_', 1:num_cells) - - sim_cell_matrix = matrix(rep(0,ngenes*num_cells), nrow=ngenes) - rownames(sim_cell_matrix) = names(gene_means) - colnames(sim_cell_matrix) = spike_cell_names - - sim_expr_vals <- function(gene_idx) { - m = gene_means[gene_idx] - return(.sim_expr_val(m, dropout_logistic_params)) - } - - for (i in 1:num_cells) { - newvals = sapply(1:ngenes, FUN=sim_expr_vals) - sim_cell_matrix[,i] = newvals - } - - return(sim_cell_matrix) -} - -##' @keywords internal -##' @noRd -##' - -.sim_expr_val <- function(m, dropout_logistic_params) { - - # include drop-out prediction - - val = 0 - if (m > 0) { - dropout_prob <- .logistic(x=log(m), midpt=dropout_logistic_params$midpt, slope=dropout_logistic_params$slope) - - if (runif(1) > dropout_prob) { - # not a drop-out - val = rnbinom(n=1, mu=m, size=1/0.1) #fixed dispersion at 0.1 - } - } - return(val) -} - - - - -##' .get_mean_var_table() -##' -##' Computes the gene mean/variance table based on all defined cell groupings (reference and observations) -##' -##' @param infercnv_obj An infercnv object populated with raw count data -##' -##' @return data.frame with 3 columns: group_name, mean, variance -##' -##' -##' @keywords internal -##' @noRd -##' - -.get_mean_var_table <- function(infercnv_obj) { - - group_indices = c(infercnv_obj@observation_grouped_cell_indices, infercnv_obj@reference_grouped_cell_indices) - - mean_var_table = NULL - - for (group_name in names(group_indices)) { - flog.info(sprintf("processing group: %s", group_name)) - expr.data = infercnv_obj@expr.data[, group_indices[[ group_name ]] ] - m = rowMeans(expr.data) - v = apply(expr.data, 1, var) - if (is.null(mean_var_table)) { - mean_var_table = data.frame(g=group_name, m=m, v=v) - } else { - mean_var_table = rbind(mean_var_table, data.frame(g=group_name, m=m, v=v)) - } - } - - return(mean_var_table) -} - -##' .get_spike_in_average_bounds() -##' -##' return mean bounds for expression of all cells in the spike-in -##' -##' @param infercnv_obj An infercnv object populated with raw count data -##' -##' @return c(left_bound, right_bound) -##' -##' @keywords internal -##' @noRd -##' - - -.get_spike_in_average_bounds <- function(infercnv_obj) { - - spike_in_cell_idx = infercnv_obj@observation_grouped_cell_indices[[ 'SPIKE' ]] - spike.expr.data = infercnv_obj@expr.data[,spike_in_cell_idx] - - bounds = .get_average_bounds(spike.expr.data) - - return(bounds) -} - - -#' remove_spike() -#' -#' Removes the spiked-in group named 'SPIKE' from the infercnv_obj -#' -#' @param infercnv_obj An infercnv object populated with raw count data -#' -#' @return infercnv_obj -#' -#' @export - -remove_spike <- function(infercnv_obj) { - - flog.info("Removing spike") - - spike_in_cell_idx = infercnv_obj@observation_grouped_cell_indices[[ 'SPIKE' ]] - - infercnv_obj@expr.data = infercnv_obj@expr.data[, -spike_in_cell_idx] - - infercnv_obj@observation_grouped_cell_indices[[ 'SPIKE' ]] <- NULL # deletes it. - - return(infercnv_obj) - -} - - - -#' scale_cnv_by_spike() -#' -#' Scales expression data according to the expression value bounds in the SPIKE group. -#' -#' Assumes data is centered at 1 -#' Expression below 1 is scaled according to the left spike bound set to zero. -#' Expression above 1 is scaled according to the right spike bound set to two. -#' -#' @param infercnv_obj An infercnv object populated with raw count data -#' -#' @return infercnv_obj -#' -#' @export - - -scale_cnv_by_spike <- function(infercnv_obj) { - - # everything here should be centered at 1 (no change). - - spike_bounds = .get_spike_in_average_bounds(infercnv_obj) - - left_bound = spike_bounds[1] - right_bound = spike_bounds[2] - - # zero gets set to left bound - # right bound gets set to 2x - - scale_by_spike <- function(x) { - if (x < 1) { - x = 1 - ( (1-x)/(1-left_bound) ) - if (x < 0) { x = 0 } - } else if (x > 1) { - x = 1 + ( (x-1) / (right_bound - 1) ) - } - return(x) - } - - infercnv_obj@expr.data <- apply(infercnv_obj@expr.data, 1:2, scale_by_spike) - - return(infercnv_obj) -} - - -#' selects the specified number of chrs having the largest number of (expressed) genes -#' @keywords internal -#' @noRd -#' - -.select_longest_chrs <- function(infercnv_obj, num_chrs_want) { - - # get count of chrs - counts = infercnv_obj@gene_order %>% count(.data$chr, sort=TRUE) - - return(counts$chr[1:num_chrs_want]) - -} - -#' Computes probability of seeing a zero expr val as a function of the mean gene expression -#' The p(0 | mean_expr) is computed separately for each sample grouping. -#' -#' @keywords internal -#' @noRd -#' - -.get_mean_vs_p0_table <- function(infercnv_obj) { - - group_indices = c(infercnv_obj@observation_grouped_cell_indices, infercnv_obj@reference_grouped_cell_indices) - - mean_p0_table = NULL - - for (group_name in names(group_indices)) { - flog.info(sprintf("processing group: %s", group_name)) - expr.data = infercnv_obj@expr.data[, group_indices[[ group_name ]] ] - - group_mean_p0_table <- .get_mean_vs_p0_from_matrix(expr.data) - group_mean_p0_table[[ 'group_name' ]] <- group_name - - if (is.null(mean_p0_table)) { - mean_p0_table = group_mean_p0_table - } else { - mean_p0_table = rbind(mean_p0_table, group_mean_p0_table) - } - } - - return(mean_p0_table) -} - -#' Computes probability of seeing a zero expr val as a function of the mean gene expression -#' based on the input expression matrix. -#' -#' @keywords internal -#' @noRd -#' - -.get_mean_vs_p0_from_matrix <- function(expr.data) { - ncells = ncol(expr.data) - m = rowMeans(expr.data) - numZeros = apply(expr.data, 1, function(x) { sum(x==0) }) - - pZero = numZeros/ncells - - mean_p0_table = data.frame(m=m, p0=pZero) - - return(mean_p0_table) -} - - -#' -#' Logistic function -#' -#' InferCNV note: Standard function here, but lifted from -#' Splatter (Zappia, Phipson, and Oshlack, 2017) -#' https://genomebiology.biomedcentral.com/articles/10.1186/s13059-017-1305-0 -#' -#' Implementation of the logistic function -#' -#' @param x value to apply the function to. -#' @param x0 midpoint parameter. Gives the centre of the function. -#' @param k shape parameter. Gives the slope of the function. -#' -#' @return Value of logistic function with given parameters -#' -#' @keywords internal -#' @noRd -#' -.logistic <- function(x, midpt, slope) { - 1 / (1 + exp(-slope * (x - midpt))) -} - - - -#' Given the mean, p0 table, fits the data to a logistic function to compute -#' the shape of the logistic distribution. -#' -#' @keywords internal -#' @noRd -#' - -.get_logistic_params <- function(mean_p0_table) { - - mean_p0_table <- mean_p0_table[mean_p0_table$m > 0, ] # remove zeros, can't take log. - - x = log(mean_p0_table$m) - y = mean_p0_table$p0 - - df = data.frame(x,y) - - #write.table(df, "_logistic_params", quote=F, sep="\t") # debugging... - - fit <- nls(y ~ .logistic(x, midpt = x0, slope = k), data = df, start = list(x0 = mean(x), k = -1)) # borrowed/updated from splatter - - logistic_params = list() - - logistic_params[[ 'midpt' ]] <- summary(fit)$coefficients["x0", "Estimate"] - logistic_params[[ 'slope' ]] <- summary(fit)$coefficients["k", "Estimate"] - - return(logistic_params) -} diff --git a/R/inferCNV_tumor_subclusters.R b/R/inferCNV_tumor_subclusters.R new file mode 100644 index 00000000..170c0be0 --- /dev/null +++ b/R/inferCNV_tumor_subclusters.R @@ -0,0 +1,365 @@ + +define_signif_tumor_subclusters <- function(infercnv_obj, p_val, hclust_method, partition_method, restrict_to_DE_genes=FALSE) { + + flog.info(sprintf("define_signif_tumor_subclusters(p_val=%g", p_val)) + + tumor_groups <- infercnv_obj@observation_grouped_cell_indices + + res = list() + + normal_expr_data = infercnv_obj@expr.data[, unlist(infercnv_obj@reference_grouped_cell_indices) ] + + for (tumor_group in names(tumor_groups)) { + + flog.info(sprintf("define_signif_tumor_subclusters(), tumor: %s", tumor_group)) + + tumor_group_idx <- tumor_groups[[ tumor_group ]] + tumor_expr_data <- infercnv_obj@expr.data[,tumor_group_idx] + + if (restrict_to_DE_genes) { + p_vals <- .find_DE_stat_significance(normal_expr_data, tumor_expr_data) + + DE_gene_idx = which(p_vals < p_val) + tumor_expr_data = tumor_expr_data[DE_gene_idx, , drop=FALSE] + + } + + tumor_subcluster_info <- .single_tumor_subclustering(tumor_group, tumor_group_idx, tumor_expr_data, p_val, hclust_method, partition_method) + + res$hc[[tumor_group]] <- tumor_subcluster_info$hc + res$subclusters[[tumor_group]] <- tumor_subcluster_info$subclusters + + } + + infercnv_obj@tumor_subclusters <- res + + + if (! is.null(infercnv_obj@.hspike)) { + flog.info("-mirroring for hspike") + infercnv_obj@.hspike <- define_signif_tumor_subclusters(infercnv_obj@.hspike, p_val, hclust_method, partition_method, restrict_to_DE_genes) + } + + + return(infercnv_obj) +} + + + +.single_tumor_subclustering <- function(tumor_name, tumor_group_idx, tumor_expr_data, p_val, hclust_method, + partition_method=c('qnorm', 'pheight', 'qgamma', 'shc', 'none') + ) { + + partition_method = match.arg(partition_method) + + tumor_subcluster_info = list() + + hc <- hclust(dist(t(tumor_expr_data)), method=hclust_method) + + tumor_subcluster_info$hc = hc + + heights = hc$height + + grps <- NULL + + if (partition_method == 'pheight') { + + cut_height = p_val * max(heights) + flog.info(sprintf("cut height based on p_val(%g) = %g and partition_method: %s", p_val, cut_height, partition_method)) + grps <- cutree(hc, h=cut_height) # will just be one cluster if height > max_height + + + } else if (partition_method == 'qnorm') { + + mu = mean(heights) + sigma = sd(heights) + + cut_height = qnorm(p=1-p_val, mean=mu, sd=sigma) + flog.info(sprintf("cut height based on p_val(%g) = %g and partition_method: %s", p_val, cut_height, partition_method)) + grps <- cutree(hc, h=cut_height) # will just be one cluster if height > max_height + + } else if (partition_method == 'qgamma') { + + # library(fitdistrplus) + gamma_fit = fitdist(heights, 'gamma') + shape = gamma_fit$estimate[1] + rate = gamma_fit$estimate[2] + cut_height=qgamma(p=1-p_val, shape=shape, rate=rate) + flog.info(sprintf("cut height based on p_val(%g) = %g and partition_method: %s", p_val, cut_height, partition_method)) + grps <- cutree(hc, h=cut_height) # will just be one cluster if height > max_height + + #} else if (partition_method == 'shc') { + # + # grps <- .get_shc_clusters(tumor_expr_data, hclust_method, p_val) + + } else if (partition_method == 'none') { + + grps <- cutree(hc, k=1) + + } else { + stop("Error, not recognizing parition_method") + } + + # cluster_ids = unique(grps) + # flog.info(sprintf("cut tree into: %g groups", length(cluster_ids))) + + tumor_subcluster_info$subclusters = list() + + ordered_idx = tumor_group_idx[hc$order] + s = split(grps,grps) + flog.info(sprintf("cut tree into: %g groups", length(s))) + + start_idx = 1 + + # for (g in cluster_ids) { + for (g in names(s)) { + + split_subcluster = paste0(tumor_name, "_s", g) + flog.info(sprintf("-processing %s,%s", tumor_name, split_subcluster)) + + # subcluster_indices = tumor_group_idx[which(grps == g)] + end_idx = start_idx + length(s[[g]]) - 1 + subcluster_indices = tumor_group_idx[hc$order[start_idx:end_idx]] + start_idx = end_idx + 1 + + tumor_subcluster_info$subclusters[[ split_subcluster ]] = subcluster_indices + + } + + return(tumor_subcluster_info) +} + + +#.get_shc_clusters <- function(tumor_expr_data, hclust_method, p_val) { +# +# library(sigclust2) +# +# flog.info(sprintf("defining groups using shc, hclust_method: %s, p_val: %g", hclust_method, p_val)) +# +# shc_result = sigclust2::shc(t(tumor_expr_data), metric='euclidean', linkage=hclust_method, alpha=p_val) +# +# cluster_idx = which(shc_result$p_norm <= p_val) +# +# grps = rep(1, ncol(tumor_expr_data)) +# names(grps) <- colnames(tumor_expr_data) +# +# counter = 1 +# for (cluster_id in cluster_idx) { +# labelsA = unlist(shc_result$idx_hc[cluster_id,1]) +# +# labelsB = unlist(shc_result$idx_hc[cluster_id,2]) +# +# counter = counter + 1 +# grps[labelsB] <- counter +# } +# +# return(grps) +#} + + + + +.find_DE_stat_significance <- function(normal_matrix, tumor_matrix) { + + run_t_test<- function(idx) { + vals1 = unlist(normal_matrix[idx,,drop=TRUE]) + vals2 = unlist(tumor_matrix[idx,,drop=TRUE]) + + ## useful way of handling tests that may fail: + ## https://stat.ethz.ch/pipermail/r-help/2008-February/154167.html + + res = try(t.test(vals1, vals2), silent=TRUE) + + if (is(res, "try-error")) return(NA) else return(res$p.value) + + } + + pvals = sapply(seq(nrow(normal_matrix)), run_t_test) + + return(pvals) +} + + + + +##### Below is deprecated.... use inferCNV_tumor_subclusters.random_smoothed_trees +## Random Trees + +.partition_by_random_trees <- function(tumor_name, tumor_expr_data, hclust_method, p_val) { + + grps <- rep(sprintf("%s.%d", tumor_name, 1), ncol(tumor_expr_data)) + names(grps) <- colnames(tumor_expr_data) + + grps <- .single_tumor_subclustering_recursive_random_trees(tumor_expr_data, hclust_method, p_val, grps) + + + return(grps) + +} + + +.single_tumor_subclustering_recursive_random_trees <- function(tumor_expr_data, hclust_method, p_val, grps.adj, min_cluster_size_recurse=10) { + + tumor_clade_name = unique(grps.adj[names(grps.adj) %in% colnames(tumor_expr_data)]) + message("unique tumor clade name: ", tumor_clade_name) + if (length(tumor_clade_name) > 1) { + stop("Error, found too many names in current clade") + } + + hc <- hclust(dist(t(tumor_expr_data)), method=hclust_method) + + rand_params_info = .parameterize_random_cluster_heights(tumor_expr_data, hclust_method) + + h_obs = rand_params_info$h_obs + h = h_obs$height + max_height = rand_params_info$max_h + + max_height_pval = 1 + if (max_height > 0) { + ## important... as some clades can be fully collapsed (all identical entries) with zero heights for all + e = rand_params_info$ecdf + max_height_pval = 1- e(max_height) + } + + #message(sprintf("Lengths(h): %s", paste(h, sep=",", collapse=","))) + #message(sprintf("max_height_pval: %g", max_height_pval)) + + if (max_height_pval <= p_val) { + ## keep on cutting. + cut_height = mean(c(h[length(h)], h[length(h)-1])) + message(sprintf("cutting at height: %g", cut_height)) + grps = cutree(h_obs, h=cut_height) + print(grps) + uniqgrps = unique(grps) + + message("unique grps: ", paste0(uniqgrps, sep=",", collapse=",")) + for (grp in uniqgrps) { + grp_idx = which(grps==grp) + + message(sprintf("grp: %s contains idx: %s", grp, paste(grp_idx,sep=",", collapse=","))) + df = tumor_expr_data[,grp_idx,drop=FALSE] + ## define subset. + subset_cell_names = colnames(df) + + subset_clade_name = sprintf("%s.%d", tumor_clade_name, grp) + grps.adj[names(grps.adj) %in% subset_cell_names] <- subset_clade_name + + if (length(grp_idx) > min_cluster_size_recurse) { + ## recurse + grps.adj <- .single_tumor_subclustering_recursive_random_trees(tumor_expr_data=df, + hclust_method=hclust_method, + p_val=p_val, + grps.adj) + } else { + message("paritioned cluster size too small to recurse further") + } + } + } else { + message("No cluster pruning: ", tumor_clade_name) + } + + return(grps.adj) +} + + +.parameterize_random_cluster_heights <- function(expr_matrix, hclust_method, plot=TRUE) { + + ## inspired by: https://www.frontiersin.org/articles/10.3389/fgene.2016.00144/full + + t_tumor.expr.data = t(expr_matrix) # cells as rows, genes as cols + d = dist(t_tumor.expr.data) + + h_obs = hclust(d, method=hclust_method) + + + # permute by chromosomes + permute_col_vals <- function(df) { + + num_cells = nrow(df) + + for (i in seq(ncol(df) ) ) { + + df[, i] = df[sample(x=1:num_cells, size=num_cells, replace=FALSE), i] + } + + df + } + + h_rand_ex = NULL + max_rand_heights = c() + num_rand_iters=100 + for (i in 1:num_rand_iters) { + #message(sprintf("iter i:%d", i)) + rand.tumor.expr.data = permute_col_vals(t_tumor.expr.data) + + rand.dist = dist(rand.tumor.expr.data) + h_rand <- hclust(rand.dist, method=hclust_method) + h_rand_ex = h_rand + max_rand_heights = c(max_rand_heights, max(h_rand$height)) + } + + h = h_obs$height + + max_height = max(h) + + message(sprintf("Lengths for original tree branches (h): %s", paste(h, sep=",", collapse=","))) + message(sprintf("Max height: %g", max_height)) + + message(sprintf("Lengths for max heights: %s", paste(max_rand_heights, sep=",", collapse=","))) + + e = ecdf(max_rand_heights) + + pval = 1- e(max_height) + message(sprintf("pval: %g", pval)) + + params_list <- list(h_obs=h_obs, + max_h=max_height, + rand_max_height_dist=max_rand_heights, + ecdf=e, + h_rand_ex = h_rand_ex + ) + + if (plot) { + .plot_tree_height_dist(params_list) + } + + + return(params_list) + +} + + +.plot_tree_height_dist <- function(params_list, plot_title='tree_heights') { + + mf = par(mfrow=(c(3,1))) + + ## density plot + rand_height_density = density(params_list$rand_max_height_dist) + + xlim=range(params_list$max_h, rand_height_density$x) + ylim=range(rand_height_density$y) + plot(rand_height_density, xlim=xlim, ylim=ylim, main=paste(plot_title, "density")) + abline(v=params_list$max_h, col='red') + + + ## plot the clustering + h_obs = params_list$h_obs + h_obs$labels <- NULL #because they're too long to display + plot(h_obs) + + ## plot a random example: + h_rand_ex = params_list$h_rand_ex + h_rand_ex$labels <- NULL + plot(h_rand_ex) + + par(mf) + +} + +.get_tree_height_via_ecdf <- function(p_val, params_list) { + + h = quantile(params_list$ecdf, probs=1-p_val) + + return(h) +} + + diff --git a/R/inferCNV_tumor_subclusters.random_smoothed_trees.R b/R/inferCNV_tumor_subclusters.random_smoothed_trees.R new file mode 100644 index 00000000..7a30ee1b --- /dev/null +++ b/R/inferCNV_tumor_subclusters.random_smoothed_trees.R @@ -0,0 +1,339 @@ + + +define_signif_tumor_subclusters_via_random_smooothed_trees <- function(infercnv_obj, p_val, hclust_method, window_size=101, + max_recursion_depth=3, min_cluster_size_recurse=10) { + + ## the state of the infercnv object here should be: + ## log transformed + ## but *NOT* smoothed. + ## TODO: -include check for smoothed property so will not run this if already smoothed. + + infercnv_copy = infercnv_obj ## don't want to change the original data .... just want to add subcluster info. + + flog.info(sprintf("define_signif_tumor_subclusters(p_val=%g", p_val)) + + infercnv_obj <- subtract_ref_expr_from_obs(infercnv_obj, inv_log=TRUE) # important, remove normal from tumor before testing clusters. + + ## must treat normals same way! + tumor_groups <- c(infercnv_obj@observation_grouped_cell_indices, infercnv_obj@reference_grouped_cell_indices) + + res = list() + + for (tumor_group in names(tumor_groups)) { + + flog.info(sprintf("define_signif_tumor_subclusters(), tumor: %s", tumor_group)) + + tumor_group_idx <- tumor_groups[[ tumor_group ]] + tumor_expr_data <- infercnv_obj@expr.data[,tumor_group_idx] + + tumor_subcluster_info <- .single_tumor_subclustering_smoothed_tree(tumor_group, tumor_group_idx, tumor_expr_data, p_val, hclust_method, window_size, + max_recursion_depth, min_cluster_size_recurse) + + res$hc[[tumor_group]] <- tumor_subcluster_info$hc + res$subclusters[[tumor_group]] <- tumor_subcluster_info$subclusters + + } + + infercnv_copy@tumor_subclusters <- res + + if (! is.null(infercnv_copy@.hspike)) { + flog.info("-mirroring for hspike") + + infercnv_copy@.hspike <- define_signif_tumor_subclusters_via_random_smooothed_trees(infercnv_copy@.hspike, p_val, hclust_method, + window_size, max_recursion_depth, min_cluster_size_recurse) + } + + + return(infercnv_copy) +} + + + +.single_tumor_subclustering_smoothed_tree <- function(tumor_name, tumor_group_idx, tumor_expr_data, p_val, hclust_method, window_size, + max_recursion_depth, min_cluster_size_recurse) { + + + tumor_subcluster_info = list() + + sm_tumor_expr_data = apply(tumor_expr_data, 2, caTools::runmean, k=window_size) + sm_tumor_expr_data = scale(sm_tumor_expr_data, center=TRUE, scale=FALSE) + + hc <- hclust(dist(t(sm_tumor_expr_data)), method=hclust_method) + + tumor_subcluster_info$hc = hc + + heights = hc$height + + grps <- .partition_by_random_smoothed_trees(tumor_name, tumor_expr_data, hclust_method, p_val, window_size, + max_recursion_depth, min_cluster_size_recurse) + + + #cluster_ids = unique(grps) + #flog.info(sprintf("cut tree into: %g groups", length(cluster_ids))) + + tumor_subcluster_info$subclusters = list() + + ordered_idx = tumor_group_idx[hc$order] + s = split(grps,grps) + + flog.info(sprintf("cut tree into: %g groups", length(s))) + + start_idx = 1 + for (g in names(s)) { + #for (g in cluster_ids) { + split_subcluster = paste0(tumor_name, "_s", g) + flog.info(sprintf("-processing %s,%s", tumor_name, split_subcluster)) + + # subcluster_indices = tumor_group_idx[which(grps == g)] + # subcluster_indices = ordered_idx[which(grps == g)] + end_idx = start_idx + length(s[[g]]) - 1 + subcluster_indices = tumor_group_idx[hc$order[start_idx:end_idx]] + start_idx = end_idx + 1 + + tumor_subcluster_info$subclusters[[ split_subcluster ]] = subcluster_indices + + } + + return(tumor_subcluster_info) +} + + +## Random Trees + +.partition_by_random_smoothed_trees <- function(tumor_name, tumor_expr_data, hclust_method, p_val, window_size, + max_recursion_depth, min_cluster_size_recurse) { + + grps <- rep(sprintf("%s.%d", tumor_name, 1), ncol(tumor_expr_data)) + names(grps) <- colnames(tumor_expr_data) + + grps <- .single_tumor_subclustering_recursive_random_smoothed_trees(tumor_expr_data, hclust_method, p_val, grps, window_size, + max_recursion_depth, min_cluster_size_recurse) + + + return(grps) + +} + + +.single_tumor_subclustering_recursive_random_smoothed_trees <- function(tumor_expr_data, hclust_method, p_val, grps.adj, window_size, + max_recursion_depth, + min_cluster_size_recurse, + recursion_depth=1) { + + + if (recursion_depth > max_recursion_depth) { + flog.warn("-not exceeding max recursion depth.") + return(grps.adj) + } + + tumor_clade_name = unique(grps.adj[names(grps.adj) %in% colnames(tumor_expr_data)]) + message("unique tumor clade name: ", tumor_clade_name) + if (length(tumor_clade_name) > 1) { + stop("Error, found too many names in current clade") + } + + rand_params_info = .parameterize_random_cluster_heights_smoothed_trees(tumor_expr_data, hclust_method, window_size) + + h_obs = rand_params_info$h_obs + h = h_obs$height + max_height = rand_params_info$max_h + + max_height_pval = 1 + if (max_height > 0) { + ## important... as some clades can be fully collapsed (all identical entries) with zero heights for all + e = rand_params_info$ecdf + max_height_pval = 1- e(max_height) + } + + #message(sprintf("Lengths(h): %s", paste(h, sep=",", collapse=","))) + #message(sprintf("max_height_pval: %g", max_height_pval)) + + if (max_height_pval <= p_val) { + ## keep on cutting. + cut_height = mean(c(h[length(h)], h[length(h)-1])) + flog.info(sprintf("cutting at height: %g", cut_height)) + grps = cutree(h_obs, h=cut_height) + print(grps) + uniqgrps = unique(grps) + + message("unique grps: ", paste0(uniqgrps, sep=",", collapse=",")) + + if (all(sapply(uniqgrps, function(grp) { + (sum(grps==grp) < min_cluster_size_recurse) + } ))) { + flog.warn("none of the split subclusters exceed min cluster size. Not recursing here.") + return(grps.adj) + } + + for (grp in uniqgrps) { + grp_idx = which(grps==grp) + + message(sprintf("grp: %s contains idx: %s", grp, paste(grp_idx,sep=",", collapse=","))) + df = tumor_expr_data[,grp_idx,drop=FALSE] + ## define subset. + subset_cell_names = colnames(df) + + subset_clade_name = sprintf("%s.%d", tumor_clade_name, grp) + grps.adj[names(grps.adj) %in% subset_cell_names] <- subset_clade_name + + + if (length(grp_idx) >= min_cluster_size_recurse) { + ## recurse + grps.adj <- .single_tumor_subclustering_recursive_random_smoothed_trees(tumor_expr_data=df, + hclust_method=hclust_method, + p_val=p_val, + grps.adj=grps.adj, + window_size=window_size, + max_recursion_depth=max_recursion_depth, + min_cluster_size_recurse=min_cluster_size_recurse, + recursion_depth = recursion_depth + 1 ) + } else { + flog.warn(sprintf("%s size of %d is too small to recurse on", subset_clade_name, length(grp_idx))) + } + + } + + } else { + message("No cluster pruning: ", tumor_clade_name) + } + + return(grps.adj) +} + + +.parameterize_random_cluster_heights_smoothed_trees <- function(expr_matrix, hclust_method, window_size, plot=FALSE) { + + ## inspired by: https://www.frontiersin.org/articles/10.3389/fgene.2016.00144/full + + sm_expr_data = apply(expr_matrix, 2, caTools::runmean, k=window_size) + sm_expr_data = scale(sm_expr_data, center=TRUE, scale=FALSE) + + d = dist(t(sm_expr_data)) + + h_obs = hclust(d, method=hclust_method) + + + # permute by chromosomes + permute_col_vals <- function(df) { + ## cells as rows, features as columns + + num_cells = nrow(df) + + for (i in seq(ncol(df) ) ) { + + df[, i] = df[sample(x=1:num_cells, size=num_cells, replace=FALSE), i] + } + + df + } + + + flog.info(sprintf("random trees, using %g parallel threads", infercnv.env$GLOBAL_NUM_THREADS)) + if (infercnv.env$GLOBAL_NUM_THREADS > future::availableCores()) { + flog.warn(sprintf("not enough cores available, setting to num avail cores: %g", future::availableCores())) + infercnv.env$GLOBAL_NUM_THREADS <- future::availableCores() + } + + # library(doParallel) + registerDoParallel(cores=infercnv.env$GLOBAL_NUM_THREADS) + num_rand_iters=100 + max_rand_heights <- foreach (i=1:num_rand_iters) %dopar% { + #message("rand iteration: ", i) + + rand.tumor.expr.data = t(permute_col_vals( t(expr_matrix) )) + + ## smooth it and re-center: + sm.rand.tumor.expr.data = apply(rand.tumor.expr.data, 2, caTools::runmean, k=window_size) + sm.rand.tumor.expr.data = scale(sm.rand.tumor.expr.data, center=TRUE, scale=FALSE) + + rand.dist = dist(t(sm.rand.tumor.expr.data)) + h_rand <- hclust(rand.dist, method=hclust_method) + max_rand_height <- max(h_rand$height) + + max_rand_height + } + + max_rand_heights <- as.numeric(max_rand_heights) + + h = h_obs$height + + max_height = max(h) + + message(sprintf("Lengths for original tree branches (h): %s", paste(h, sep=",", collapse=","))) + message(sprintf("Max height: %g", max_height)) + + message(sprintf("Lengths for max heights: %s", paste(max_rand_heights, sep=",", collapse=","))) + + e = ecdf(max_rand_heights) + + pval = 1- e(max_height) + message(sprintf("pval: %g", pval)) + + params_list <- list(h_obs=h_obs, + max_h=max_height, + rand_max_height_dist=max_rand_heights, + ecdf=e + ) + + if (plot) { + .plot_tree_height_dist(params_list) + } + + + return(params_list) + +} + + +.plot_tree_height_dist <- function(params_list, plot_title='tree_heights') { + + mf = par(mfrow=(c(2,1))) + + ## density plot + rand_height_density = density(params_list$rand_max_height_dist) + + xlim=range(params_list$max_h, rand_height_density$x) + ylim=range(rand_height_density$y) + plot(rand_height_density, xlim=xlim, ylim=ylim, main=paste(plot_title, "density")) + abline(v=params_list$max_h, col='red') + + + ## plot the clustering + h_obs = params_list$h_obs + h_obs$labels <- NULL #because they're too long to display + plot(h_obs) + + par(mf) + +} + +.get_tree_height_via_ecdf <- function(p_val, params_list) { + + h = quantile(params_list$ecdf, probs=1-p_val) + + return(h) +} + + + +find_DE_stat_significance <- function(normal_matrix, tumor_matrix) { + + run_t_test<- function(idx) { + vals1 = unlist(normal_matrix[idx,,drop=TRUE]) + vals2 = unlist(tumor_matrix[idx,,drop=TRUE]) + + ## useful way of handling tests that may fail: + ## https://stat.ethz.ch/pipermail/r-help/2008-February/154167.html + + res = try(t.test(vals1, vals2), silent=TRUE) + + if (is(res, "try-error")) return(NA) else return(res$p.value) + + } + + pvals = sapply(seq(nrow(normal_matrix)), run_t_test) + + return(pvals) +} + + diff --git a/R/noise_reduction.R b/R/noise_reduction.R new file mode 100644 index 00000000..97d25680 --- /dev/null +++ b/R/noise_reduction.R @@ -0,0 +1,90 @@ + +#' @title apply_median_filtering +#' +#' @description Apply a median filtering to the expression matrix within each tumor bounds +#' +#' @param infercnv_obj infercnv_object +#' +#' @param window_size Size of the window side centered on the data point to filter (default = 7). +#' +#' @param on_observations boolean (default=TRUE), run on observations data (tumor cells). +#' +#' @param on_references boolean (default=TRUE), run on references (normal cells). +#' +#' @return infercnv_obj with median filtering applied to observations +#' +#' @export +#' + +apply_median_filtering <- function(infercnv_obj, + window_size=7, + on_observations=TRUE, + on_references=TRUE) { + + if (window_size%%2 != 1 | window_size < 2) { + flog.error("::apply_median_filtering: Error, window_size is an even or < 2. Please specify an odd number >= 3.") + } + + half_window = (window_size - 1) / 2 + + gene_chr_listing = infercnv_obj@gene_order[[C_CHR]] + chrs = unlist(unique(gene_chr_listing)) + + if (on_observations) { + for (tumor_type in names(infercnv_obj@observation_grouped_cell_indices)) { + + tumor_indices_list = infercnv_obj@tumor_subclusters[["subclusters"]][[ tumor_type ]] + + for (tumor_indices in tumor_indices_list) { + for (chr in chrs) { + chr_genes_indices = which(gene_chr_listing == chr) + working_data = infercnv_obj@expr.data[chr_genes_indices, tumor_indices, drop=FALSE] + + infercnv_obj@expr.data[chr_genes_indices, tumor_indices] = .median_filter(data=working_data, + window_size=window_size, + half_window=half_window) + } + } + } + } + + if (on_references) { + for (ref_indices in infercnv_obj@reference_grouped_cell_indices) { + for (chr in chrs) { + chr_genes_indices = which(gene_chr_listing == chr) + working_data = infercnv_obj@expr.data[chr_genes_indices, ref_indices, drop=FALSE] + + infercnv_obj@expr.data[chr_genes_indices, ref_indices] = .median_filter(data=working_data, + window_size=window_size, + half_window=half_window) + } + } + } + + return(infercnv_obj) +} + + +.median_filter <- function(data, + window_size, + half_window) { + + xdim = dim(data)[1] + ydim = dim(data)[2] + results = data + + # if (xdim >= window_size & ydim >= window_size) { + for (posx in 1:xdim) { + posxa <- ifelse(posx <= (half_window + 1), 1, (posx - (half_window + 1))) + posxb <- ifelse(posx >= (xdim - (half_window + 1)), xdim, (posx + (half_window + 1))) + for ( posy in 1:ydim) { + posya <- ifelse(posy <= (half_window + 1), 1, (posy - (half_window + 1))) + posyb <- ifelse(posy >= (ydim - (half_window + 1)), ydim, (posy + (half_window + 1))) + results[posx, posy] = median(data[posxa:posxb, posya:posyb]) + } + } + #} + + return(results) +} + diff --git a/__simulations/data/normal.counts.matrix.REMOVED.git-id b/__simulations/data/normal.counts.matrix.REMOVED.git-id deleted file mode 100644 index 079baf45..00000000 --- a/__simulations/data/normal.counts.matrix.REMOVED.git-id +++ /dev/null @@ -1 +0,0 @@ -2a7fd3b5cd3db2ca03a5d905787052d2ef10880f \ No newline at end of file diff --git a/__simulations/infercnv_on_simulated.Rmd b/__simulations/infercnv_on_simulated.Rmd deleted file mode 100644 index 69605bbf..00000000 --- a/__simulations/infercnv_on_simulated.Rmd +++ /dev/null @@ -1,254 +0,0 @@ ---- -title: "infercnv_run" -author: "Brian Haas" -date: "9/5/2018" -output: - html_document: default - pdf_document: default - --- - -```{r setup, include=FALSE} -knitr::opts_chunk$set(echo = TRUE) -library(infercnv) -``` - -## Create the InferCNV Object - -```{r} -infercnv_obj = CreateInfercnvObject( - raw_counts_matrix="sim.data", - annotations_file="sim.sample.annots.txt", - delim="\t", - gene_order_file="../example/full_dataset/gencode_v19_gene_pos.txt", - ref_group_names=c("normal") ) - -``` - - - -## Filtering genes - -Removing those genes that are very lowly expressed or present in very few cells - - -```{r} -# filter out low expressed genes -cutoff=1 -infercnv_obj <- require_above_min_mean_expr_cutoff(infercnv_obj, cutoff) - -# filter out bad cells -min_cells_per_gene=3 -infercnv_obj <- require_above_min_cells_ref(infercnv_obj, min_cells_per_gene=min_cells_per_gene) - -## for safe keeping -infercnv_orig_filtered = infercnv_obj -#plot_mean_chr_expr_lineplot(infercnv_obj) -save('infercnv_obj', file = 'infercnv_obj.orig_filtered') - -``` - - -## Normalize each cell's counts for sequencing depth - -```{r} -infercnv_obj <- infercnv:::normalize_counts_by_seq_depth(infercnv_obj) -``` - - -## perform Anscombe normalization - -Suggested by Matan for removing noisy variation at low counts - -```{r} -infercnv_obj <- infercnv:::anscombe_transform(infercnv_obj) -save('infercnv_obj', file='infercnv_obj.anscombe') -``` - - -## log transform the normalized counts: - -```{r} -infercnv_obj <- log2xplus1(infercnv_obj) -save('infercnv_obj', file='infercnv_obj.log_transformed') -``` - -## Apply maximum bounds to the expression data to reduce outlier effects -```{r} -threshold = mean(abs(get_average_bounds(infercnv_obj))) -infercnv_obj <- apply_max_threshold_bounds(infercnv_obj, threshold=threshold) -``` - -## Initial view, before inferCNV operations: - -```{r, results="hide"} -plot_cnv(infercnv_obj, - output_filename='infercnv.logtransf', - x.range="auto", - title = "Before InferCNV (filtered & log2 transformed)", - color_safe_pal = FALSE, - x.center = mean(infercnv_obj@expr.data)) -``` - - -```{r} -knitr::include_graphics("infercnv.logtransf.png") -``` - - - -## perform smoothing across chromosomes - -```{r} -infercnv_obj = smooth_by_chromosome(infercnv_obj, window_length=101, smooth_ends=TRUE) -save('infercnv_obj', file='infercnv_obj.smooth_by_chr') - -# re-center each cell -infercnv_obj <- center_cell_expr_across_chromosome(infercnv_obj, method = "median") -save('infercnv_obj', file='infercnv_obj.cells_recentered') - -``` - - -```{r, results='hide' } -plot_cnv(infercnv_obj, - output_filename='infercnv.chr_smoothed', - x.range="auto", - title = "chr smoothed and cells re-centered", - color_safe_pal = FALSE) -``` - - -```{r} -knitr::include_graphics("infercnv.chr_smoothed.png") -``` - - - -# subtract the reference values from observations, now have log(fold change) values - -```{r} -infercnv_obj <- subtract_ref_expr_from_obs(infercnv_obj, inv_log=TRUE) -save('infercnv_obj', file='infercnv_obj.ref_subtracted') -``` - -```{r, results="hide"} -plot_cnv(infercnv_obj, - output_filename='infercnv.ref_subtracted', - x.range="auto", - title="ref subtracted", - color_safe_pal = FALSE) -``` - -```{r} -knitr::include_graphics("infercnv.ref_subtracted.png") -``` - - -## invert log values - -Converting the log(FC) values to regular fold change values, centered at 1 (no fold change) - -This is important because we want (1/2)x to be symmetrical to 1.5x, representing loss/gain of one chromosome region. - -```{r} - -infercnv_obj <- invert_log2(infercnv_obj) -save('infercnv_obj', file='infercnv_obj.inverted_log') -``` - - -```{r, results="hide"} -plot_cnv(infercnv_obj, - output_filename='infercnv.inverted', - color_safe_pal = FALSE, - x.range="auto", - x.center=1, - title = "inverted log FC to FC") -``` - - -```{r} -knitr::include_graphics("infercnv.inverted.png") -``` - - -## Removing noise - -```{r} -infercnv_obj <- clear_noise_via_ref_mean_sd(infercnv_obj, sd_amplifier = 1) -save('infercnv_obj', file='infercnv_obj.denoised') -``` - - -```{r, results="hide"} -plot_cnv(infercnv_obj, - output_filename='infercnv.denoised', - x.range="auto", - x.center=1, - title="denoised", - color_safe_pal = FALSE) -``` - -```{r} -knitr::include_graphics("infercnv.denoised.png") -``` - -## Remove outlier data points - -This generally improves on the visualization - -```{r} -infercnv_obj = remove_outliers_norm(infercnv_obj) -save('infercnv_obj', file="infercnv_obj.outliers_removed") -``` - - -```{r, results="hide"} -plot_cnv(infercnv_obj, - output_filename='infercnv.outliers_removed', - color_safe_pal = FALSE, - x.range="auto", - x.center=1, - title = "outliers removed") -``` - - -```{r} -knitr::include_graphics("infercnv.outliers_removed.png") -``` - - - -## Find DE genes by comparing the mutant types to normal types, BASIC - -Runs a t-Test comparing tumor/normal for each patient and normal sample, and masks out those genes that are not significantly DE. - -```{r} - -load('infercnv_obj.final') -plot_data = infercnv_obj@expr.data -high_threshold = max(abs(quantile(plot_data[plot_data != 0], c(0.05, 0.95)))) - -low_threshold = -1 * high_threshold - -infercnv_obj2 <- infercnv:::mask_non_DE_genes_basic(infercnv_obj, test.use = 't', center_val=1) - -``` - - -```{r, results="hide"} -plot_cnv(infercnv_obj2, - output_filename='infercnv.non-DE-genes-masked', - color_safe_pal = FALSE, - x.range=c(low_threshold, high_threshold), - x.center=1, - title = "non-DE-genes-masked") -``` - - -```{r} -knitr::include_graphics("infercnv.non-DE-genes-masked.png") - -``` - - diff --git a/__simulations/infercnv_on_simulated.html.REMOVED.git-id b/__simulations/infercnv_on_simulated.html.REMOVED.git-id deleted file mode 100644 index 5a04a411..00000000 --- a/__simulations/infercnv_on_simulated.html.REMOVED.git-id +++ /dev/null @@ -1 +0,0 @@ -20dda7f7a23503ef183e8e0556078095d15da8ff \ No newline at end of file diff --git a/__simulations/make_simulated_CNV.R b/__simulations/make_simulated_CNV.R deleted file mode 100755 index 7fc126aa..00000000 --- a/__simulations/make_simulated_CNV.R +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env Rscript - - -filename="data/normal.counts.matrix" - -data = read.table(filename, header=T, row.names=1) - - -ordered_genes = read.table("../example/full_dataset/gencode_v19_gene_pos.txt") -ordered_gene_names = ordered_genes[,1] -data = data[rownames(data) %in% ordered_gene_names,] -ordered_gene_names = ordered_gene_names[ordered_gene_names %in% rownames(data)] -ordering = match(ordered_gene_names, rownames(data)) -data = data[ordering,] - - - -mutate_gene_expr = function(data, genes_to_mutate, multiplier, colname_prefix) { - - data_m = data - data_m[genes_to_mutate,] = data_m[genes_to_mutate,] * multiplier - colnames(data_m) = paste(colname_prefix, colnames(data_m), sep="") - - return(data_m) - -} - -sample_annots = data.frame(patient=colnames(data), type="normal") - -# lower ranges - -genes_to_mutate = 1000:3000 -data_pt25 = mutate_gene_expr(data, genes_to_mutate, 0.25, "A_") -sample_annots = rbind(sample_annots, data.frame(patient=colnames(data_pt25), type="Apt25")) - - -genes_to_mutate = 5000:7000 -data_pt5 = mutate_gene_expr(data, genes_to_mutate, 0.5, "B_") -sample_annots = rbind(sample_annots, data.frame(patient=colnames(data_pt5), type="Bpt5")) - - -# upper ranges -genes_to_mutate = 9000:11000 -data_1pt5 = mutate_gene_expr(data, genes_to_mutate, 1.5, "C_") -sample_annots = rbind(sample_annots, data.frame(patient=colnames(data_1pt5), type="C1pt5")) - -genes_to_mutate = 13000:15000 -data_1pt75 = mutate_gene_expr(data, genes_to_mutate, 1.75, "D_") -sample_annots = rbind(sample_annots, data.frame(patient=colnames(data_1pt75), type="D1pt75")) - - - -## make new data -new_data = cbind(data, data_pt25, data_pt5, data_1pt5, data_1pt75) - -sample_annots = cbind(sample_annots, t2=sample_annots$type) - -# write outputs - -write.table(new_data, "sim.data", quote=F, sep="\t") - -write.table(sample_annots, "sim.sample.annots.txt", quote=F, sep="\t", row.names=FALSE, col.names=FALSE) - diff --git a/__simulations/sim.data.REMOVED.git-id b/__simulations/sim.data.REMOVED.git-id deleted file mode 100644 index 315720f5..00000000 --- a/__simulations/sim.data.REMOVED.git-id +++ /dev/null @@ -1 +0,0 @@ -f1745589a7957da20772a8a09d4718fadc1b97b6 \ No newline at end of file diff --git a/__simulations/sim.sample.annots.txt b/__simulations/sim.sample.annots.txt deleted file mode 100644 index 4ea45337..00000000 --- a/__simulations/sim.sample.annots.txt +++ /dev/null @@ -1,225 +0,0 @@ -MGH54_P16_F12 normal normal -MGH53_P5_C12 normal normal -MGH54_P12_C10 normal normal -MGH54_P16_F02 normal normal -MGH54_P11_C11 normal normal -MGH53_P1_E03 normal normal -MGH54_P15_D06 normal normal -MGH54_P14_F05 normal normal -MGH54_P16_A03 normal normal -MGH53_P4_G11 normal normal -MGH53_P7_B09 normal normal -MGH54_P17_D11 normal normal -MGH54_P10_G04 normal normal -MGH54_P10_C06 normal normal -MGH53_P2_A02 normal normal -MGH53_P4_F06 normal normal -MGH53_P7_F07 normal normal -MGH53_P11_F06 normal normal -MGH53_P5_G02 normal normal -MGH53_P1_F09 normal normal -MGH53_P11_H03 normal normal -MGH53_P10_D11 normal normal -MGH53_P1_A10 normal normal -MGH53_P4_B12 normal normal -MGH53_P5_H09 normal normal -MGH53_P9_D07 normal normal -MGH53_P11_E03 normal normal -MGH53_P1_C02 normal normal -MGH53_P10_F11 normal normal -MGH53_P1_E02 normal normal -MGH53_P1_D07 normal normal -MGH53_P6_B01 normal normal -MGH53_P2_G04 normal normal -MGH53_P1_C05 normal normal -MGH53_P2_G09 normal normal -MGH53_P5_A05 normal normal -MGH53_P5_F04 normal normal -MGH53_P10_H03 normal normal -MGH53_P11_F08 normal normal -MGH54_P7_F12 normal normal -MGH53_P8_F03 normal normal -MGH53_P11_A07 normal normal -MGH53_P6_B11 normal normal -MGH53_P4_C07 normal normal -MGH53_P6_H06 normal normal -A_MGH54_P16_F12 Apt25 Apt25 -A_MGH53_P5_C12 Apt25 Apt25 -A_MGH54_P12_C10 Apt25 Apt25 -A_MGH54_P16_F02 Apt25 Apt25 -A_MGH54_P11_C11 Apt25 Apt25 -A_MGH53_P1_E03 Apt25 Apt25 -A_MGH54_P15_D06 Apt25 Apt25 -A_MGH54_P14_F05 Apt25 Apt25 -A_MGH54_P16_A03 Apt25 Apt25 -A_MGH53_P4_G11 Apt25 Apt25 -A_MGH53_P7_B09 Apt25 Apt25 -A_MGH54_P17_D11 Apt25 Apt25 -A_MGH54_P10_G04 Apt25 Apt25 -A_MGH54_P10_C06 Apt25 Apt25 -A_MGH53_P2_A02 Apt25 Apt25 -A_MGH53_P4_F06 Apt25 Apt25 -A_MGH53_P7_F07 Apt25 Apt25 -A_MGH53_P11_F06 Apt25 Apt25 -A_MGH53_P5_G02 Apt25 Apt25 -A_MGH53_P1_F09 Apt25 Apt25 -A_MGH53_P11_H03 Apt25 Apt25 -A_MGH53_P10_D11 Apt25 Apt25 -A_MGH53_P1_A10 Apt25 Apt25 -A_MGH53_P4_B12 Apt25 Apt25 -A_MGH53_P5_H09 Apt25 Apt25 -A_MGH53_P9_D07 Apt25 Apt25 -A_MGH53_P11_E03 Apt25 Apt25 -A_MGH53_P1_C02 Apt25 Apt25 -A_MGH53_P10_F11 Apt25 Apt25 -A_MGH53_P1_E02 Apt25 Apt25 -A_MGH53_P1_D07 Apt25 Apt25 -A_MGH53_P6_B01 Apt25 Apt25 -A_MGH53_P2_G04 Apt25 Apt25 -A_MGH53_P1_C05 Apt25 Apt25 -A_MGH53_P2_G09 Apt25 Apt25 -A_MGH53_P5_A05 Apt25 Apt25 -A_MGH53_P5_F04 Apt25 Apt25 -A_MGH53_P10_H03 Apt25 Apt25 -A_MGH53_P11_F08 Apt25 Apt25 -A_MGH54_P7_F12 Apt25 Apt25 -A_MGH53_P8_F03 Apt25 Apt25 -A_MGH53_P11_A07 Apt25 Apt25 -A_MGH53_P6_B11 Apt25 Apt25 -A_MGH53_P4_C07 Apt25 Apt25 -A_MGH53_P6_H06 Apt25 Apt25 -B_MGH54_P16_F12 Bpt5 Bpt5 -B_MGH53_P5_C12 Bpt5 Bpt5 -B_MGH54_P12_C10 Bpt5 Bpt5 -B_MGH54_P16_F02 Bpt5 Bpt5 -B_MGH54_P11_C11 Bpt5 Bpt5 -B_MGH53_P1_E03 Bpt5 Bpt5 -B_MGH54_P15_D06 Bpt5 Bpt5 -B_MGH54_P14_F05 Bpt5 Bpt5 -B_MGH54_P16_A03 Bpt5 Bpt5 -B_MGH53_P4_G11 Bpt5 Bpt5 -B_MGH53_P7_B09 Bpt5 Bpt5 -B_MGH54_P17_D11 Bpt5 Bpt5 -B_MGH54_P10_G04 Bpt5 Bpt5 -B_MGH54_P10_C06 Bpt5 Bpt5 -B_MGH53_P2_A02 Bpt5 Bpt5 -B_MGH53_P4_F06 Bpt5 Bpt5 -B_MGH53_P7_F07 Bpt5 Bpt5 -B_MGH53_P11_F06 Bpt5 Bpt5 -B_MGH53_P5_G02 Bpt5 Bpt5 -B_MGH53_P1_F09 Bpt5 Bpt5 -B_MGH53_P11_H03 Bpt5 Bpt5 -B_MGH53_P10_D11 Bpt5 Bpt5 -B_MGH53_P1_A10 Bpt5 Bpt5 -B_MGH53_P4_B12 Bpt5 Bpt5 -B_MGH53_P5_H09 Bpt5 Bpt5 -B_MGH53_P9_D07 Bpt5 Bpt5 -B_MGH53_P11_E03 Bpt5 Bpt5 -B_MGH53_P1_C02 Bpt5 Bpt5 -B_MGH53_P10_F11 Bpt5 Bpt5 -B_MGH53_P1_E02 Bpt5 Bpt5 -B_MGH53_P1_D07 Bpt5 Bpt5 -B_MGH53_P6_B01 Bpt5 Bpt5 -B_MGH53_P2_G04 Bpt5 Bpt5 -B_MGH53_P1_C05 Bpt5 Bpt5 -B_MGH53_P2_G09 Bpt5 Bpt5 -B_MGH53_P5_A05 Bpt5 Bpt5 -B_MGH53_P5_F04 Bpt5 Bpt5 -B_MGH53_P10_H03 Bpt5 Bpt5 -B_MGH53_P11_F08 Bpt5 Bpt5 -B_MGH54_P7_F12 Bpt5 Bpt5 -B_MGH53_P8_F03 Bpt5 Bpt5 -B_MGH53_P11_A07 Bpt5 Bpt5 -B_MGH53_P6_B11 Bpt5 Bpt5 -B_MGH53_P4_C07 Bpt5 Bpt5 -B_MGH53_P6_H06 Bpt5 Bpt5 -C_MGH54_P16_F12 C1pt5 C1pt5 -C_MGH53_P5_C12 C1pt5 C1pt5 -C_MGH54_P12_C10 C1pt5 C1pt5 -C_MGH54_P16_F02 C1pt5 C1pt5 -C_MGH54_P11_C11 C1pt5 C1pt5 -C_MGH53_P1_E03 C1pt5 C1pt5 -C_MGH54_P15_D06 C1pt5 C1pt5 -C_MGH54_P14_F05 C1pt5 C1pt5 -C_MGH54_P16_A03 C1pt5 C1pt5 -C_MGH53_P4_G11 C1pt5 C1pt5 -C_MGH53_P7_B09 C1pt5 C1pt5 -C_MGH54_P17_D11 C1pt5 C1pt5 -C_MGH54_P10_G04 C1pt5 C1pt5 -C_MGH54_P10_C06 C1pt5 C1pt5 -C_MGH53_P2_A02 C1pt5 C1pt5 -C_MGH53_P4_F06 C1pt5 C1pt5 -C_MGH53_P7_F07 C1pt5 C1pt5 -C_MGH53_P11_F06 C1pt5 C1pt5 -C_MGH53_P5_G02 C1pt5 C1pt5 -C_MGH53_P1_F09 C1pt5 C1pt5 -C_MGH53_P11_H03 C1pt5 C1pt5 -C_MGH53_P10_D11 C1pt5 C1pt5 -C_MGH53_P1_A10 C1pt5 C1pt5 -C_MGH53_P4_B12 C1pt5 C1pt5 -C_MGH53_P5_H09 C1pt5 C1pt5 -C_MGH53_P9_D07 C1pt5 C1pt5 -C_MGH53_P11_E03 C1pt5 C1pt5 -C_MGH53_P1_C02 C1pt5 C1pt5 -C_MGH53_P10_F11 C1pt5 C1pt5 -C_MGH53_P1_E02 C1pt5 C1pt5 -C_MGH53_P1_D07 C1pt5 C1pt5 -C_MGH53_P6_B01 C1pt5 C1pt5 -C_MGH53_P2_G04 C1pt5 C1pt5 -C_MGH53_P1_C05 C1pt5 C1pt5 -C_MGH53_P2_G09 C1pt5 C1pt5 -C_MGH53_P5_A05 C1pt5 C1pt5 -C_MGH53_P5_F04 C1pt5 C1pt5 -C_MGH53_P10_H03 C1pt5 C1pt5 -C_MGH53_P11_F08 C1pt5 C1pt5 -C_MGH54_P7_F12 C1pt5 C1pt5 -C_MGH53_P8_F03 C1pt5 C1pt5 -C_MGH53_P11_A07 C1pt5 C1pt5 -C_MGH53_P6_B11 C1pt5 C1pt5 -C_MGH53_P4_C07 C1pt5 C1pt5 -C_MGH53_P6_H06 C1pt5 C1pt5 -D_MGH54_P16_F12 D1pt75 D1pt75 -D_MGH53_P5_C12 D1pt75 D1pt75 -D_MGH54_P12_C10 D1pt75 D1pt75 -D_MGH54_P16_F02 D1pt75 D1pt75 -D_MGH54_P11_C11 D1pt75 D1pt75 -D_MGH53_P1_E03 D1pt75 D1pt75 -D_MGH54_P15_D06 D1pt75 D1pt75 -D_MGH54_P14_F05 D1pt75 D1pt75 -D_MGH54_P16_A03 D1pt75 D1pt75 -D_MGH53_P4_G11 D1pt75 D1pt75 -D_MGH53_P7_B09 D1pt75 D1pt75 -D_MGH54_P17_D11 D1pt75 D1pt75 -D_MGH54_P10_G04 D1pt75 D1pt75 -D_MGH54_P10_C06 D1pt75 D1pt75 -D_MGH53_P2_A02 D1pt75 D1pt75 -D_MGH53_P4_F06 D1pt75 D1pt75 -D_MGH53_P7_F07 D1pt75 D1pt75 -D_MGH53_P11_F06 D1pt75 D1pt75 -D_MGH53_P5_G02 D1pt75 D1pt75 -D_MGH53_P1_F09 D1pt75 D1pt75 -D_MGH53_P11_H03 D1pt75 D1pt75 -D_MGH53_P10_D11 D1pt75 D1pt75 -D_MGH53_P1_A10 D1pt75 D1pt75 -D_MGH53_P4_B12 D1pt75 D1pt75 -D_MGH53_P5_H09 D1pt75 D1pt75 -D_MGH53_P9_D07 D1pt75 D1pt75 -D_MGH53_P11_E03 D1pt75 D1pt75 -D_MGH53_P1_C02 D1pt75 D1pt75 -D_MGH53_P10_F11 D1pt75 D1pt75 -D_MGH53_P1_E02 D1pt75 D1pt75 -D_MGH53_P1_D07 D1pt75 D1pt75 -D_MGH53_P6_B01 D1pt75 D1pt75 -D_MGH53_P2_G04 D1pt75 D1pt75 -D_MGH53_P1_C05 D1pt75 D1pt75 -D_MGH53_P2_G09 D1pt75 D1pt75 -D_MGH53_P5_A05 D1pt75 D1pt75 -D_MGH53_P5_F04 D1pt75 D1pt75 -D_MGH53_P10_H03 D1pt75 D1pt75 -D_MGH53_P11_F08 D1pt75 D1pt75 -D_MGH54_P7_F12 D1pt75 D1pt75 -D_MGH53_P8_F03 D1pt75 D1pt75 -D_MGH53_P11_A07 D1pt75 D1pt75 -D_MGH53_P6_B11 D1pt75 D1pt75 -D_MGH53_P4_C07 D1pt75 D1pt75 -D_MGH53_P6_H06 D1pt75 D1pt75 diff --git a/example/Makefile b/example/Makefile index 87463e43..07196af9 100644 --- a/example/Makefile +++ b/example/Makefile @@ -1,26 +1,27 @@ -all: logexpr logexpr_steps zscores zscores_steps +all: + ./run.R -logexpr: - ./__test_scripts/run.logexpr.R - touch logexpr +clean: + rm -rf ./output_dir -logexpr_steps: - ./__test_scripts/run.logexpr.R.steps - touch logexpr_steps -zscores: - ./__test_scripts/run.zscores.R - touch zscores -zscores_steps: - ./__test_scripts/run.zscores.R.steps - touch zscores_steps +debug: + ../scripts/plot_hspike.by_num_cells.R --infercnv_obj output_dir/run.final.infercnv_obj + ../scripts/plot_hspike.R --infercnv_obj output_dir/run.final.infercnv_obj + ../scripts/run_HMM_on_hspike.R --infercnv_obj output_dir/run.final.infercnv_obj + ../scripts/plot_hspike_vs_sample_chrs.R --infercnv_obj output_dir/preliminary.infercnv_obj -clean: - rm -f logexpr logexpr_steps zscores zscores_steps - rm -rf ./__test_outdir.* +i3: + cat run.R | sed s/HMM=TRUE/HMM=TRUE,HMM_type=\'i3\'/ > run.i3.R + Rscript ./run.i3.R + +Bayes: + cat run.R | sed s/HMM=TRUE/HMM=TRUE,BayesMaxPNormal=0.35/ > run.Bayes.R + Rscript ./run.Bayes.R + diff --git a/example/README.txt b/example/README.txt new file mode 100644 index 00000000..778e808e --- /dev/null +++ b/example/README.txt @@ -0,0 +1,5 @@ +This example uses an abridged version of the gencode annotations. You do not want to use that file with your own data. It's abridged here only to reduce space in R packaging. + +The complete gencode annotation file can be found here: +https://github.com/broadinstitute/inferCNV_examples/tree/master/__gene_position_data + diff --git a/example/gencode_downsampled.txt b/example/gencode_downsampled.EXAMPLE_ONLY_DONT_REUSE.txt similarity index 100% rename from example/gencode_downsampled.txt rename to example/gencode_downsampled.EXAMPLE_ONLY_DONT_REUSE.txt diff --git a/example/run.R b/example/run.R index 9b96fea2..716fd106 100755 --- a/example/run.R +++ b/example/run.R @@ -1,12 +1,14 @@ #!/usr/bin/env Rscript +options(error = function() traceback(2)) + library("infercnv") # create the infercnv object -infercnv_obj = CreateInfercnvObject(raw_counts_matrix="oligodendroglioma_expression_downsampled.counts.matrix", - annotations_file="oligodendroglioma_annotations_downsampled.txt", +infercnv_obj = CreateInfercnvObject(raw_counts_matrix="../inst/extdata/oligodendroglioma_expression_downsampled.counts.matrix.gz", + annotations_file="../inst/extdata/oligodendroglioma_annotations_downsampled.txt", delim="\t", - gene_order_file="gencode_downsampled.txt", + gene_order_file="../inst/extdata/gencode_downsampled.EXAMPLE_ONLY_DONT_REUSE.txt", ref_group_names=c("Microglia/Macrophage","Oligodendrocytes (non-malignant)")) out_dir="output_dir" @@ -14,9 +16,9 @@ out_dir="output_dir" infercnv_obj = infercnv::run(infercnv_obj, cutoff=1, # cutoff=1 works well for Smart-seq2, and cutoff=0.1 works well for 10x Genomics out_dir=out_dir, - cluster_by_groups=T, - plot_steps=F, - mask_nonDE_genes = T, - include.spike=T # used for final scaling to fit range (0,2) centered at 1. + cluster_by_groups=TRUE, + plot_steps=FALSE, + denoise=TRUE, + HMM=TRUE ) diff --git a/inferCNV.wiki b/inferCNV.wiki index 2ee46786..f664e663 160000 --- a/inferCNV.wiki +++ b/inferCNV.wiki @@ -1 +1 @@ -Subproject commit 2ee467860418e4468e3a5fb01760dd635b18fde3 +Subproject commit f664e663b42d2b1e6e657c668b2c8a78c9de4576 diff --git a/inst/BUGS_Mixture_Model b/inst/BUGS_Mixture_Model new file mode 100755 index 00000000..12f61722 --- /dev/null +++ b/inst/BUGS_Mixture_Model @@ -0,0 +1,57 @@ +model { + ## Likelihood + ## Single cell + for( j in 1:C ) { # for each cell line K + for ( i in 1:G ) { # for each gene j + ## Likelihood + ## generative distribution of the observed data + gexp[i, j] ~ dnorm(mu.1[j], tau.1[j]) + } + + ## mu and tau are Fixed Effects dependent on the cluster/state assignment + ## result in cluster specific means and precision + ## gamma is group specific Random Effect + + mu.1[j] <- mu[1] * (equals(epsilon[j], 1 )) + + mu[2] * (equals(epsilon[j], 2 )) + + mu[3] * (equals(epsilon[j], 3 )) + + mu[4] * (equals(epsilon[j], 4 )) + + mu[5] * (equals(epsilon[j], 5 )) + + mu[6] * (equals(epsilon[j], 6 )) + + tau.1[j] <- sig[1] * (equals( epsilon[j], 1 )) + + sig[2] * (equals( epsilon[j], 2 )) + + sig[3] * (equals( epsilon[j], 3 )) + + sig[4] * (equals( epsilon[j], 4 )) + + sig[5] * (equals( epsilon[j], 5 )) + + sig[6] * (equals( epsilon[j], 6 )) + + # PRIOR + ## Epsilons hold our cluster/state assignment + ## theta are the mixture probabilities for states + ## cell specific + + epsilon[j] ~ dcat(theta[]) + } + + + # HYPERPARAMETERS + ## hyperparameter for for gamma, a flat gamma distribution + sigma ~ dgamma(1,1) + + # dirchlet with equal probabilities for each state, equals to a uniform + # provides the probabilities distribution of states + # alpha can be 1 or (1/nubmer of states) + + + ## Hyperparameter for epsilon, + ## This is the mixing property! + + theta[1:6] ~ ddirich(alpha[]) + + # HYPERHYPERPARAMETER + + for(i in 1:6){ + alpha[i] <- 1 + } +} \ No newline at end of file diff --git a/inst/CITATION b/inst/CITATION index 1999b249..74cbc39e 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -1,18 +1,34 @@ citHeader("To cite the inferCNV package in publications, please use:") -citEntry(entry = "article", - title = "", +citEntry(entry = "manual", + title = "inferCNV of the Trinity CTAT Project.", author = personList( - person(), - person() + person("Timothy", "Tickle", email = "ttickle@broadinstitute.org", role = c("aut", "cre")), + person("Itay", "Tirosh", email = "tirosh@broadinstitute.org", role = "aut"), + person("Christophe", "Georgescu", email = "cgeorges@broadinstitute.org", role = "aut"), + person("Maxwell", "Brown", email = "mbrown@broadinstitute.org", role = "aut"), + person("Brian", "Haas", email = "bhaas@broadinstitute.org", role = "aut") ), - journal = "", - year = "2018", - volume = "", - pages = "", - textVersion = paste("authors", "title", "journal") + organization = "Klarman Cell Observatory, Broad Institute of MIT and Harvard", + address = "Cambridge, MA, USA", + year = 2019, + url = "https://github.com/broadinstitute/inferCNV", + textVersion = "inferCNV of the Trinity CTAT Project. https://github.com/broadinstitute/inferCNV" ) +#citEntry(entry = "article", +# title = "", +# author = personList( +# person(), +# person() +# ), +# journal = "", +# year = "2018", +# volume = "", +# pages = "", +# textVersion = paste("authors", "title", "journal") +#) + diff --git a/inst/NEWS b/inst/NEWS new file mode 100644 index 00000000..f6e1f21f --- /dev/null +++ b/inst/NEWS @@ -0,0 +1,2 @@ +Changes in version 0.99.0 (2019-03-15) ++ Submitted to Bioconductor diff --git a/inst/extdata/gencode_downsampled.EXAMPLE_ONLY_DONT_REUSE.txt b/inst/extdata/gencode_downsampled.EXAMPLE_ONLY_DONT_REUSE.txt new file mode 100644 index 00000000..b6f64588 --- /dev/null +++ b/inst/extdata/gencode_downsampled.EXAMPLE_ONLY_DONT_REUSE.txt @@ -0,0 +1,10338 @@ +WASH7P chr1 14363 29806 +LINC00115 chr1 761586 762902 +NOC2L chr1 879584 894689 +MIR200A chr1 1103243 1103332 +SDF4 chr1 1152288 1167411 +UBE2J2 chr1 1189289 1209265 +SCNN1D chr1 1215816 1227409 +ACAP3 chr1 1227756 1244989 +PUSL1 chr1 1243947 1247057 +CPSF3L chr1 1246965 1260071 +AURKAIP1 chr1 1309110 1310875 +CCNL2 chr1 1321091 1334708 +MRPL20 chr1 1337288 1342693 +ANKRD65 chr1 1353800 1357149 +ATAD3B chr1 1407143 1433228 +SSU72 chr1 1477053 1510249 +MMP23B chr1 1567474 1570639 +SLC35E2B chr1 1592939 1624167 +SLC35E2 chr1 1656277 1677431 +GNB1 chr1 1716729 1822495 +GABRD chr1 1950780 1962192 +PRKCZ chr1 1981909 2116834 +C1orf86 chr1 2115903 2144159 +RER1 chr1 2323267 2336883 +PEX10 chr1 2336236 2345236 +PANK4 chr1 2439972 2458039 +TNFRSF14 chr1 2487078 2496821 +FAM213B chr1 2517930 2522908 +WRAP73 chr1 3547331 3569325 +LRRC47 chr1 3696784 3713068 +DFFB chr1 3773845 3801993 +C1orf174 chr1 3805689 3816857 +MIR4417 chr1 5624131 5624203 +RPL22 chr1 6241329 6269449 +ICMT chr1 6281253 6296032 +ACOT7 chr1 6324329 6454451 +NOL9 chr1 6581407 6614595 +KLHL21 chr1 6650784 6674667 +PHF13 chr1 6673745 6684093 +THAP3 chr1 6684926 6695646 +DNAJC11 chr1 6694228 6761984 +CAMTA1 chr1 6845384 7829766 +VAMP3 chr1 7831329 7841492 +PARK7 chr1 8014351 8045565 +ERRFI1 chr1 8064464 8086368 +SLC45A1 chr1 8377886 8404227 +RERE chr1 8412457 8877702 +ENO1 chr1 8921061 8939308 +SLC2A5 chr1 9095166 9148537 +SPSB1 chr1 9352939 9429591 +CLSTN1 chr1 9789084 9884584 +LZIC chr1 9982173 10003465 +NMNAT1 chr1 10003486 10045559 +UBE4B chr1 10092890 10241297 +KIF1B chr1 10270863 10441661 +PGD chr1 10458649 10480201 +DFFA chr1 10516579 10532583 +TARDBP chr1 11072414 11085796 +SRM chr1 11114641 11120081 +EXOSC10 chr1 11126675 11159938 +MTOR chr1 11166592 11322564 +UBIAD1 chr1 11333263 11356106 +PTCHD2 chr1 11539223 11597641 +FBXO44 chr1 11714432 11723384 +FBXO6 chr1 11724181 11734411 +MAD2L2 chr1 11734537 11751707 +AGTRAP chr1 11796141 11814859 +CLCN6 chr1 11866207 11903201 +NPPA chr1 11905766 11908402 +RNU5E-1 chr1 11968209 11968328 +PLOD1 chr1 11994262 12035595 +MFN2 chr1 12040238 12073571 +MIIP chr1 12079523 12092102 +TNFRSF1B chr1 12227060 12269285 +DHRS3 chr1 12627939 12677737 +PRAMEF10 chr1 12952727 12958101 +PRAMEF16 chr1 13495254 13498260 +PDPN chr1 13909960 13944452 +PRDM2 chr1 14026693 14151574 +KAZN chr1 14925200 15444539 +CASP9 chr1 15817327 15853029 +DNAJC16 chr1 15853308 15918874 +FBLIM1 chr1 16083102 16113089 +SPEN chr1 16174359 16266955 +ZBTB17 chr1 16268364 16302627 +FBXO42 chr1 16573334 16678949 +SZRD1 chr1 16679070 16724640 +NECAP2 chr1 16767167 16786573 +NBPF1 chr1 16888814 16940057 +CROCCP2 chr1 16944751 16971178 +MIR3675 chr1 17185444 17185516 +SDHB chr1 17345217 17380665 +PADI2 chr1 17393256 17445948 +RCC2 chr1 17733256 17766220 +ARHGEF10L chr1 17866330 18024369 +IGSF21 chr1 18434240 18704977 +UBR4 chr1 19401000 19536770 +EMC1 chr1 19542158 19578046 +MRTO4 chr1 19578033 19586622 +AKR7A2 chr1 19630459 19638640 +PQLC2 chr1 19638820 19655794 +CAPZB chr1 19665267 19812066 +MINOS1 chr1 19923477 19956314 +TMCO4 chr1 20008706 20126438 +VWA5B1 chr1 20617412 20681387 +CAMK2N1 chr1 20808884 20812713 +MUL1 chr1 20825943 20834654 +DDOST chr1 20978270 20988000 +HP1BP3 chr1 21069154 21113816 +EIF4G3 chr1 21132963 21503377 +RAP1GAP chr1 21922708 21995801 +USP48 chr1 22004791 22110099 +CELA3A chr1 22328149 22339032 +LINC00339 chr1 22351681 22357716 +CDC42 chr1 22379120 22419437 +C1QA chr1 22962999 22966101 +C1QC chr1 22970123 22974603 +C1QB chr1 22979255 22988031 +MIR4684 chr1 23046010 23046091 +HNRNPR chr1 23630264 23670829 +ZNF436 chr1 23685941 23695935 +C1orf213 chr1 23695490 23698332 +ASAP3 chr1 23755056 23811061 +ID3 chr1 23884409 23886285 +RPL11 chr1 24018269 24022915 +TCEB3 chr1 24069645 24088549 +PITHD1 chr1 24104895 24114722 +LYPLA2 chr1 24117460 24122029 +GALE chr1 24122089 24127271 +HMGCL chr1 24128375 24165110 +FUCA1 chr1 24171567 24194784 +PNRC2 chr1 24285599 24289952 +SRSF10 chr1 24291294 24307417 +NIPAL3 chr1 24742284 24799466 +SRRM1 chr1 24958207 24999758 +CLIC4 chr1 25071848 25170815 +SYF2 chr1 25549170 25558993 +C1orf63 chr1 25568728 25664704 +TMEM50A chr1 25664408 25688852 +RHCE chr1 25688740 25756683 +MTFR1L chr1 26145131 26159432 +STMN1 chr1 26210672 26233482 +PAFAH2 chr1 26286258 26324648 +SCARNA17 chr1 26332497 26332621 +PDIK1L chr1 26437664 26452034 +ZNF593 chr1 26496362 26498551 +CATSPER4 chr1 26517052 26529459 +SH3BGRL3 chr1 26605667 26608007 +DHDDS chr1 26758773 26797785 +HMGN2 chr1 26798941 26802463 +RPS6KA1 chr1 26856252 26901521 +ARID1A chr1 27022524 27108595 +PIGV chr1 27113963 27124889 +GPN2 chr1 27202624 27216788 +GPATCH3 chr1 27216979 27226957 +NUDC chr1 27226729 27273353 +C1orf172 chr1 27276053 27286897 +TMEM222 chr1 27648651 27662891 +GPR3 chr1 27719148 27722318 +WASF2 chr1 27730730 27816669 +IFI6 chr1 27992572 27998729 +FAM76A chr1 28052490 28089633 +STX12 chr1 28099694 28150963 +THEMIS2 chr1 28199055 28213196 +RPA2 chr1 28218035 28241257 +EYA3 chr1 28296855 28415207 +PTAFR chr1 28473677 28520447 +DNAJC8 chr1 28525967 28559536 +ATPIF1 chr1 28562620 28573417 +SESN2 chr1 28586038 28609002 +MED18 chr1 28655513 28662476 +PHACTR4 chr1 28696114 28826881 +RCC1 chr1 28832455 28865812 +SNHG3 chr1 28832492 28837404 +TRNAU1AP chr1 28879597 28905051 +SNHG12 chr1 28905050 28909495 +TAF12 chr1 28915835 28969597 +RAB42 chr1 28918712 28921955 +GMEB1 chr1 28995244 29045865 +YTHDF2 chr1 29063133 29096287 +OPRD1 chr1 29138654 29190208 +SRSF4 chr1 29474255 29508499 +MECR chr1 29519385 29557454 +LAPTM5 chr1 31205316 31230667 +SDC3 chr1 31342314 31381608 +PUM1 chr1 31404353 31538838 +SNRNP40 chr1 31732417 31769662 +ZCCHC17 chr1 31769842 31837783 +TINAGL1 chr1 32042116 32053288 +PEF1 chr1 32095463 32110497 +COL16A1 chr1 32117848 32169920 +BAI2 chr1 32192718 32230494 +PTP4A2 chr1 32372022 32410457 +KHDRBS1 chr1 32479430 32526451 +TMEM39B chr1 32537632 32568467 +KPNA6 chr1 32573639 32642169 +TXLNA chr1 32645287 32663886 +CCDC28B chr1 32665987 32670988 +TMEM234 chr1 32680073 32687972 +EIF3I chr1 32687529 32697205 +MTMR9LP chr1 32697259 32707221 +HDAC1 chr1 32757687 32799236 +MARCKSL1 chr1 32799433 32801980 +BSDC1 chr1 32830704 32860332 +ZBTB8A chr1 33005028 33071540 +ZBTB8OS chr1 33065773 33116504 +RBBP4 chr1 33116743 33151812 +KIAA1522 chr1 33207486 33240571 +YARS chr1 33240840 33283754 +S100PBP chr1 33282368 33324476 +AK2 chr1 33473585 33546597 +ADC chr1 33546705 33586131 +ZNF362 chr1 33722146 33766320 +PHC2 chr1 33789224 33896653 +SMIM12 chr1 35178338 35325417 +GJB3 chr1 35246790 35251970 +ZMYM6NB chr1 35447136 35450954 +ZMYM1 chr1 35525387 35581460 +SFPQ chr1 35641979 35658749 +ZMYM4 chr1 35734568 35887659 +KIAA0319L chr1 35899091 36023551 +NCDN chr1 36023074 36032875 +PSMB2 chr1 36067185 36107445 +C1orf216 chr1 36179476 36185073 +AGO3 chr1 36396319 36538101 +ADPRHL2 chr1 36554476 36559533 +COL8A2 chr1 36560837 36590821 +TRAPPC3 chr1 36602173 36615098 +THRAP3 chr1 36690017 36770958 +LSM10 chr1 36856839 36863493 +MRPS15 chr1 36921319 36930038 +CSF3R chr1 36931644 36948879 +GRIK3 chr1 37261128 37499730 +MEAF6 chr1 37958176 37980375 +SNIP1 chr1 38000050 38019903 +GNL2 chr1 38032417 38061536 +C1orf109 chr1 38147242 38157914 +MANEAL chr1 38259474 38266809 +C1orf122 chr1 38272651 38275126 +INPP5B chr1 38326369 38412729 +SNORA63 chr1 38349909 38349989 +SF3A3 chr1 38422647 38456593 +FHL3 chr1 38462442 38471278 +UTP11L chr1 38474930 38490496 +RRAGC chr1 39303870 39325495 +AKIRIN1 chr1 39456895 39471731 +NDUFS5 chr1 39491990 39500308 +MACF1 chr1 39546988 39952849 +PPIEL chr1 39997510 40024379 +PABPC4 chr1 40026488 40042462 +PPIE chr1 40157854 40229586 +BMP8B chr1 40222854 40254533 +TRIT1 chr1 40306723 40349183 +CAP1 chr1 40505905 40538321 +PPT1 chr1 40538379 40563375 +RLF chr1 40627045 40706593 +ZMPSTE24 chr1 40723779 40759856 +COL9A2 chr1 40766159 40783488 +SMAP2 chr1 40810522 40888998 +NFYC chr1 41157320 41237275 +CITED4 chr1 41326729 41328018 +CTPS1 chr1 41445007 41478235 +SCMH1 chr1 41492872 41707826 +FOXJ3 chr1 42642210 42801548 +ZMYND12 chr1 42896000 42921938 +PPIH chr1 43124096 43142429 +YBX1 chr1 43148098 43168020 +LEPRE1 chr1 43212006 43232755 +C1orf50 chr1 43232940 43263968 +CCDC23 chr1 43272723 43282954 +ZNF691 chr1 43312280 43318148 +SLC2A1 chr1 43391052 43424530 +EBNA1BP2 chr1 43629846 43736607 +TMEM125 chr1 43735665 43739673 +CDC20 chr1 43824626 43828874 +ELOVL1 chr1 43829068 43833696 +MED8 chr1 43849588 43855479 +SZT2 chr1 43855553 43918321 +HYI chr1 43916824 43919660 +KDM4A chr1 44115829 44171186 +IPO13 chr1 44412611 44433694 +DPH2 chr1 44435672 44439041 +ATP6V0B chr1 44440159 44443967 +SLC6A9 chr1 44457172 44497139 +DMAP1 chr1 44679127 44686353 +ERI3 chr1 44686742 44820932 +RNF220 chr1 44870866 45117396 +MIR5584 chr1 45011165 45011224 +TMEM53 chr1 45100910 45140227 +RPS8 chr1 45240923 45244451 +SNORD38B chr1 45244062 45244128 +PLK3 chr1 45265897 45271662 +EIF2B3 chr1 45316450 45452282 +HECTD3 chr1 45468212 45477001 +UROD chr1 45477819 45481247 +HPDL chr1 45792545 45794347 +MUTYH chr1 45794835 45806142 +TOE1 chr1 45805342 45809647 +PRDX1 chr1 45976708 45988719 +AKR1A1 chr1 46016215 46035721 +NASP chr1 46049518 46084566 +GPBP1L1 chr1 46092976 46153785 +TMEM69 chr1 46152886 46160115 +IPP chr1 46159996 46216322 +PIK3R3 chr1 46505812 46642160 +POMGNT1 chr1 46654354 46685977 +LRRC41 chr1 46726868 46769280 +UQCRH chr1 46769303 46782448 +NSUN4 chr1 46805849 46830824 +FAAH chr1 46859937 46879520 +MKNK1 chr1 47023090 47082515 +ATPAF1 chr1 47098409 47139539 +EFCAB14 chr1 47124366 47184824 +PDZK1IP1 chr1 47649265 47656716 +CMPK1 chr1 47799469 47844511 +SPATA6 chr1 48761044 48937845 +BEND5 chr1 49193195 49242641 +ELAVL4 chr1 50513686 50669458 +CDKN2C chr1 51426417 51440305 +RNF11 chr1 51701943 51739127 +EPS15 chr1 51819935 51985000 +OSBPL9 chr1 52042851 52254889 +NRD1 chr1 52254863 52344477 +TXNDC12 chr1 52485803 52521843 +KTI12 chr1 52497775 52499488 +BTF3L4 chr1 52521797 52556388 +ZFYVE9 chr1 52608046 52812358 +ORC1 chr1 52838501 52870131 +PRPF38A chr1 52870236 52886511 +ZCCHC11 chr1 52873954 53019159 +GPX7 chr1 53068044 53074723 +SELRC1 chr1 53152508 53164038 +ZYG11B chr1 53192126 53293014 +SCP2 chr1 53392901 53517375 +MIR5095 chr1 53400602 53400689 +CPT2 chr1 53662101 53679869 +C1orf123 chr1 53679771 53686289 +MAGOH chr1 53692564 53704282 +LRP8 chr1 53711217 53793742 +YIPF1 chr1 54317392 54356407 +HSPB11 chr1 54382247 54411602 +LRRC42 chr1 54411750 54433841 +TMEM59 chr1 54497347 54519177 +MRPL37 chr1 54649714 54691137 +TTC4 chr1 55181495 55208330 +DHCR24 chr1 55315306 55352891 +TMEM61 chr1 55446465 55457966 +USP24 chr1 55532032 55680786 +PPAP2B chr1 56960419 57110974 +DAB1 chr1 57460451 59012406 +OMA1 chr1 58881056 59012474 +JUN chr1 59246465 59249785 +NFIA chr1 61330931 61928465 +TM2D1 chr1 62146718 62191095 +USP1 chr1 62901968 62917475 +ATG4C chr1 63249806 63331184 +FOXD3 chr1 63788730 63790797 +ALG6 chr1 63833261 63904233 +ITGB3BP chr1 63906441 64059392 +EFCAB7 chr1 63989043 64038364 +PGM1 chr1 64058947 64125916 +RAVER2 chr1 65210778 65298915 +JAK1 chr1 65298912 65432187 +AK4 chr1 65613232 65697828 +DNAJC6 chr1 65713902 65881552 +LEPROT chr1 65886270 65901690 +PDE4B chr1 66258197 66840259 +MIR3117 chr1 67094123 67094200 +MIER1 chr1 67390578 67454302 +SERBP1 chr1 67873493 67896098 +GADD45A chr1 68150744 68154021 +WLS chr1 68564142 68698803 +LRRC40 chr1 70610488 70671303 +SRSF11 chr1 70671365 70718735 +ANKRD13C chr1 70726271 70820417 +HHLA3 chr1 70820488 70851022 +CTH chr1 70876901 70905534 +ZRANB2 chr1 71528974 71546980 +NEGR1 chr1 71861623 72748417 +CRYZ chr1 75171170 75199092 +TYW3 chr1 75198836 75232362 +SLC44A5 chr1 75667816 76076801 +ACADM chr1 76190036 76253260 +RABGGTB chr1 76251879 76260764 +ST6GALNAC3 chr1 76540404 77100286 +PIGK chr1 77554675 77685115 +AK5 chr1 77747736 78025651 +ZZZ3 chr1 78028101 78149104 +USP33 chr1 78161672 78225537 +FAM73A chr1 78245309 78344106 +FUBP1 chr1 78409740 78444794 +DNAJB4 chr1 78444859 78483648 +IFI44L chr1 79085607 79111830 +IFI44 chr1 79115481 79129763 +TTLL7 chr1 84330711 84464833 +PRKACB chr1 84543745 84704181 +RPF1 chr1 84944942 84963473 +GNG5 chr1 84964008 84972248 +C1orf52 chr1 85715639 85725355 +BCL10 chr1 85731931 85742773 +DDAH1 chr1 85784164 86043933 +ZNHIT6 chr1 86115106 86174116 +ODF2L chr1 86814344 86862023 +CLCA4 chr1 87012761 87046437 +SH3GLB1 chr1 87170259 87213867 +SEP15 chr1 87328132 87380107 +PKN2 chr1 89149905 89301938 +GTF2B chr1 89318615 89357627 +CCBL2 chr1 89401456 89458636 +RBMXL1 chr1 89445139 89458643 +GBP3 chr1 89472349 89488577 +LRRC8D chr1 90286573 90402170 +ZNF326 chr1 90460671 90501090 +ZNF644 chr1 91380859 91487829 +CDC7 chr1 91966408 91991321 +BTBD8 chr1 92545862 92613393 +GLMN chr1 92711959 92764544 +RPAP2 chr1 92764522 92867613 +RPL5 chr1 93297582 93307481 +FAM69A chr1 93307724 93427057 +MTF2 chr1 93544792 93604638 +TMED5 chr1 93615299 93646285 +DR1 chr1 93811445 93835055 +DNTTIP2 chr1 94333373 94345474 +ABCA4 chr1 94458393 94586688 +ABCD3 chr1 94883933 94984222 +F3 chr1 94994781 95007356 +CNN3 chr1 95362507 95392834 +ALG14 chr1 95439963 95538501 +RWDD3 chr1 95699711 95712781 +PTBP2 chr1 97187221 97289294 +DPYD chr1 97543299 98386605 +SNX7 chr1 99127236 99226056 +SLC35A3 chr1 100435345 100492535 +HIAT1 chr1 100503653 100548933 +RTCA chr1 100731763 100758325 +EXTL2 chr1 101337943 101361554 +SLC30A7 chr1 101361632 101447309 +DPH5 chr1 101455179 101491644 +RNPC3 chr1 104068313 104097861 +AMY2B chr1 104096437 104122156 +ACTG1P4 chr1 104112175 104113296 +PRMT6 chr1 107599267 107601916 +NBPF4 chr1 108765963 108786689 +PRPF38B chr1 109234945 109244425 +STXBP3 chr1 109289296 109352148 +GPSM2 chr1 109417972 109477167 +CLCC1 chr1 109472130 109506111 +WDR47 chr1 109512836 109584850 +TMEM167B chr1 109632425 109639556 +SARS chr1 109756540 109780791 +CELSR2 chr1 109792641 109818372 +PSRC1 chr1 109822178 109825808 +SORT1 chr1 109852192 109940573 +PSMA5 chr1 109941653 109969062 +ATXN7L2 chr1 110026101 110035426 +GNAI3 chr1 110091233 110136975 +AMPD2 chr1 110158726 110174673 +GSTM4 chr1 110198703 110208118 +GSTM2 chr1 110210644 110252171 +GSTM1 chr1 110230436 110251661 +GSTM3 chr1 110276554 110284384 +CSF1 chr1 110452864 110473614 +AHCYL1 chr1 110527308 110566357 +STRIP1 chr1 110574199 110617263 +ALX3 chr1 110602616 110613322 +RBM15 chr1 110881128 110889299 +LAMTOR5 chr1 110943871 110950564 +KCNA10 chr1 111059839 111061797 +KCNA2 chr1 111136202 111174096 +CD53 chr1 111415775 111442550 +LRIF1 chr1 111489807 111506701 +DRAM2 chr1 111659955 111682838 +CEPT1 chr1 111682249 111727724 +CHIA chr1 111833484 111863188 +WDR77 chr1 111982512 111991998 +ATP5F1 chr1 111991486 112005395 +ADORA3 chr1 112025970 112106584 +RAP1A chr1 112084840 112259313 +FAM212B chr1 112223252 112298446 +DDX20 chr1 112297867 112310638 +CTTNBP2NL chr1 112938803 113006078 +ST7L chr1 113066140 113163447 +CAPZA1 chr1 113161795 113214241 +RHOC chr1 113243728 113250056 +SLC16A1 chr1 113454469 113499635 +LRIG2 chr1 113615831 113674882 +AP4B1 chr1 114437370 114447823 +DCLRE1B chr1 114447763 114456708 +HIPK1 chr1 114471814 114520426 +OLFML3 chr1 114522063 114524876 +SYT6 chr1 114631913 114696541 +BCAS2 chr1 115110178 115124260 +NRAS chr1 115247090 115259515 +CSDE1 chr1 115259534 115301297 +SIKE1 chr1 115312100 115323306 +MAB21L3 chr1 116654376 116677861 +ATP1A1 chr1 116915290 116952883 +ATP1A1OS chr1 116934869 116961244 +CD58 chr1 117057157 117113661 +PTGFRN chr1 117452679 117532980 +TTF2 chr1 117602925 117650075 +GDAP2 chr1 118406107 118472253 +WDR3 chr1 118472343 118509165 +WARS2 chr1 119573839 119683294 +PHGDH chr1 120202421 120286838 +NBPF7 chr1 120377388 120387779 +NOTCH2 chr1 120454176 120612240 +FCGR1B chr1 120926979 120935937 +SRGAP2C chr1 121107124 121129949 +FAM72D chr1 143896452 143913143 +LINC00623 chr1 144275918 144341756 +PFN1P2 chr1 144612266 144612683 +PDE4DIP chr1 144836157 145076186 +SEC22B chr1 145096220 145116922 +NUDT4P1 chr1 145139025 145139569 +NOTCH2NL chr1 145209119 145291972 +NBPF10 chr1 145289772 145370303 +TXNIP chr1 145438469 145442635 +POLR3GL chr1 145456236 145470388 +LIX1L chr1 145477085 145501669 +RBM8A chr1 145507598 145513536 +GNRHR2 chr1 145509752 145516076 +PEX11B chr1 145516252 145523730 +ANKRD35 chr1 145549230 145568526 +PIAS3 chr1 145575233 145586546 +POLR3C chr1 145592605 145611025 +RNF115 chr1 145611036 145696218 +GPR89A chr1 145764411 145827103 +GPR89C chr1 145883868 145924373 +NBPF11 chr1 146032647 146082765 +PRKAB2 chr1 146626685 146644129 +CHD1L chr1 146714291 146767443 +BCL9 chr1 147013182 147098017 +ACP6 chr1 147101453 147142618 +GJA8 chr1 147374946 147381393 +GPR89B chr1 147400506 147465753 +NBPF24 chr1 147574428 147634886 +NBPF14 chr1 148003642 148025863 +NBPF15 chr1 148555979 148596267 +NBPF16 chr1 148739443 148758311 +FCGR1C chr1 149369294 149378365 +FCGR1A chr1 149754227 149764074 +HIST2H2BF chr1 149754245 149783928 +HIST2H4A chr1 149804221 149811339 +HIST2H2AA3 chr1 149813505 149814478 +HIST2H2AA4 chr1 149822643 149823191 +HIST2H3A chr1 149824181 149825836 +HIST2H4B chr1 149825607 149832714 +HIST2H2BE chr1 149856010 149858232 +BOLA1 chr1 149859440 149872351 +SV2A chr1 149874870 149889434 +SF3B4 chr1 149895209 149900236 +VPS45 chr1 150039369 150117505 +PLEKHO1 chr1 150121373 150136916 +ANP32E chr1 150190717 150208504 +CA14 chr1 150229554 150237478 +APH1A chr1 150237804 150241980 +C1orf54 chr1 150240600 150253327 +C1orf51 chr1 150254953 150259505 +MRPS21 chr1 150266289 150281414 +PRPF3 chr1 150293925 150325671 +RPRD2 chr1 150335567 150449042 +TARS2 chr1 150459887 150480078 +MCL1 chr1 150547032 150552066 +ENSA chr1 150573327 150602088 +GOLPH3L chr1 150618701 150669630 +CTSS chr1 150702672 150738433 +CTSK chr1 150768684 150780799 +ARNT chr1 150782181 150849244 +SETDB1 chr1 150898739 150937213 +CERS2 chr1 150933059 150947479 +FAM63A chr1 150969025 150980851 +PRUNE chr1 150980896 151008189 +C1orf56 chr1 151020216 151024462 +CDC42SE1 chr1 151023447 151042801 +MLLT11 chr1 151030234 151040970 +TNFAIP8L2 chr1 151129105 151132225 +SCNM1 chr1 151129140 151142773 +LYSMD1 chr1 151132224 151138424 +VPS72 chr1 151142463 151167797 +PIP5K1A chr1 151170425 151222012 +PSMD4 chr1 151227179 151239955 +ZNF687 chr1 151254094 151264656 +PI4KB chr1 151264273 151300191 +RFX5 chr1 151313116 151319833 +SELENBP1 chr1 151336778 151345209 +PSMB4 chr1 151372010 151374420 +POGZ chr1 151375200 151431941 +MIR554 chr1 151518272 151518367 +SNX27 chr1 151584541 151671567 +MRPL9 chr1 151732119 151736040 +TDRKH chr1 151742583 151763892 +THEM5 chr1 151819739 151826173 +THEM4 chr1 151846060 151882284 +S100A11 chr1 152004982 152020383 +CRNN chr1 152381719 152386739 +LCE2B chr1 152647771 152659877 +LCE1A chr1 152799949 152800573 +SPRR2A chr1 153028589 153030013 +PGLYRP4 chr1 153302596 153321316 +S100A6 chr1 153507075 153508720 +S100A3 chr1 153519805 153521848 +S100A16 chr1 153579362 153585621 +S100A13 chr1 153591263 153606568 +CHTOP chr1 153606525 153618782 +SNAPIN chr1 153631130 153634306 +ILF2 chr1 153634512 153643524 +INTS3 chr1 153700543 153746555 +GATAD2B chr1 153777201 153895451 +DENND4B chr1 153901977 153919172 +CRTC2 chr1 153920145 153931101 +SLC39A1 chr1 153931575 153940188 +CREB3L4 chr1 153940010 153946839 +JTB chr1 153946745 153950164 +RAB13 chr1 153954127 153958834 +RPS27 chr1 153963235 153964626 +NUP210L chr1 153965161 154127592 +TPM3 chr1 154127784 154167124 +C1orf43 chr1 154179182 154193104 +UBAP2L chr1 154192655 154243986 +HAX1 chr1 154244987 154248351 +ATP8B2 chr1 154298029 154323783 +CHRNB2 chr1 154540257 154552502 +ADAR chr1 154554538 154600475 +PMVK chr1 154897210 154909467 +PYGO2 chr1 154929502 154936329 +SHC1 chr1 154934774 154946871 +CKS1B chr1 154947129 154951725 +FLAD1 chr1 154955814 154965587 +ADAM15 chr1 155023042 155035252 +EFNA4 chr1 155036207 155042029 +EFNA1 chr1 155099936 155107333 +SLC50A1 chr1 155107820 155111329 +DPM3 chr1 155112367 155113071 +KRTCAP2 chr1 155141884 155145951 +MTX1 chr1 155178490 155183615 +GBAP1 chr1 155183616 155197214 +GBA chr1 155204243 155214490 +SCAMP3 chr1 155225770 155232221 +CLK2 chr1 155232659 155248282 +FDPS chr1 155278539 155290457 +RUSC1 chr1 155290687 155300905 +ASH1L chr1 155305059 155532598 +MSTO1 chr1 155579979 155718153 +MSTO2P chr1 155581011 155720105 +YY1AP1 chr1 155629237 155658791 +DAP3 chr1 155657751 155708801 +GON4L chr1 155719508 155829191 +SYT11 chr1 155829300 155854990 +RIT1 chr1 155867599 155881195 +KIAA0907 chr1 155882834 155904191 +ARHGEF2 chr1 155916630 155976861 +SSR2 chr1 155978839 155990750 +UBQLN4 chr1 156005092 156023585 +LAMTOR2 chr1 156024543 156028301 +MEX3A chr1 156041804 156051789 +LMNA chr1 156052364 156109880 +SLC25A44 chr1 156163880 156182587 +PMF1-BGLAP chr1 156182773 156213123 +PMF1 chr1 156182784 156212874 +PAQR6 chr1 156213206 156217881 +SMG5 chr1 156219015 156252620 +TMEM79 chr1 156252726 156262976 +C1orf85 chr1 156259880 156265463 +CCT3 chr1 156278759 156337664 +C1orf61 chr1 156374042 156400493 +MEF2D chr1 156433519 156470620 +APOA1BP chr1 156561554 156564091 +GPATCH4 chr1 156564279 156571288 +BCAN chr1 156611182 156629324 +ISG20L2 chr1 156691683 156698591 +RRNAD1 chr1 156698234 156706752 +MRPL24 chr1 156707095 156711382 +HDGF chr1 156711899 156736717 +PRCC chr1 156720402 156770607 +ETV3 chr1 157090983 157108266 +CYCSP52 chr1 157098154 157098463 +CD1C chr1 158259576 158263420 +OR10Z1 chr1 158576229 158577170 +MNDA chr1 158801107 158819296 +IFI16 chr1 158969758 159024945 +AIM2 chr1 159032274 159116886 +CADM3 chr1 159141399 159173103 +FCRL6 chr1 159770301 159786041 +TAGLN2 chr1 159887897 159895522 +PIGM chr1 159997462 160001783 +KCNJ10 chr1 160007257 160040038 +IGSF8 chr1 160061130 160068733 +ATP1A2 chr1 160085549 160113381 +CASQ1 chr1 160160285 160171676 +PEA15 chr1 160175127 160185166 +DCAF8 chr1 160187254 160254920 +PEX19 chr1 160246602 160256138 +COPA chr1 160259063 160313190 +SUMO1P3 chr1 160287193 160287496 +NCSTN chr1 160313062 160328742 +VANGL2 chr1 160370376 160398468 +CD84 chr1 160510885 160549306 +CD244 chr1 160799950 160832692 +USF1 chr1 161009041 161015767 +ARHGAP30 chr1 161016736 161039760 +KLHDC9 chr1 161068151 161070136 +PFDN2 chr1 161070346 161087901 +NIT1 chr1 161087876 161095235 +DEDD chr1 161090764 161102478 +UFC1 chr1 161122566 161128646 +USP21 chr1 161129240 161135513 +PPOX chr1 161136200 161147803 +B4GALT3 chr1 161141100 161147287 +ADAMTS4 chr1 161154098 161168846 +NDUFS2 chr1 161166894 161184185 +FCER1G chr1 161185024 161190489 +TOMM40L chr1 161195793 161200408 +SDHC chr1 161284047 161332984 +FCGR2A chr1 161475220 161493803 +HSPA6 chr1 161494036 161496681 +FCGR3A chr1 161511549 161600917 +FCGR2B chr1 161551101 161648444 +DUSP12 chr1 161719548 161727028 +ATF6 chr1 161736084 161933860 +NOS1AP chr1 162039564 162353321 +UHMK1 chr1 162467041 162499419 +UAP1 chr1 162531323 162569627 +HSD17B7 chr1 162760492 162782607 +C1orf110 chr1 162794248 162838605 +PBX1 chr1 164524821 164868533 +MGST3 chr1 165600098 165631033 +ALDH9A1 chr1 165631453 165668100 +TMCO1 chr1 165696032 165796992 +UCK2 chr1 165796768 165880855 +POGK chr1 166808681 166825581 +TADA1 chr1 166825747 166845564 +ILDR2 chr1 166882443 166944719 +DUSP27 chr1 167063282 167098402 +POU2F1 chr1 167190066 167396582 +CREG1 chr1 167498914 167523004 +MPZL1 chr1 167690429 167761156 +MPC2 chr1 167885967 167906278 +DCAF6 chr1 167905021 168045081 +GPR161 chr1 168053997 168106821 +TIPRL chr1 168148171 168169950 +SFT2D2 chr1 168195176 168222263 +ANKRD36BP1 chr1 168214803 168216581 +ATP1B1 chr1 169074935 169101960 +NME7 chr1 169101769 169337205 +BLZF1 chr1 169337208 169365778 +SLC19A2 chr1 169433147 169455241 +SCYL3 chr1 169818772 169863408 +KIFAP3 chr1 169890467 170054349 +GORAB chr1 170501270 170522587 +PRRX1 chr1 170631869 170708560 +FMO1 chr1 171217638 171255117 +PRRC2C chr1 171454651 171562650 +VAMP4 chr1 171669300 171711387 +METTL13 chr1 171750788 171783163 +DNM3 chr1 171810621 172387606 +MIR199A2 chr1 172113675 172113784 +PIGC chr1 172339329 172413230 +SUCO chr1 172501489 172580971 +TNFSF18 chr1 173009100 173020103 +PRDX6 chr1 173446405 173457946 +KLHL20 chr1 173684080 173755840 +DARS2 chr1 173793641 173827684 +GAS5 chr1 173833038 173838020 +ZBTB37 chr1 173837220 173872687 +RC3H1 chr1 173900352 173991435 +RABGAP1L chr1 174128548 174964445 +GPR52 chr1 174417212 174418683 +CACYBP chr1 174968300 174980851 +MRPS14 chr1 174979925 174992561 +TNR chr1 175284330 175712906 +RFWD2 chr1 175913967 176176629 +ASTN1 chr1 176826438 177134109 +MIR488 chr1 176998499 176998581 +RASAL2 chr1 178062864 178448644 +TOR3A chr1 179050512 179067158 +ABL2 chr1 179068462 179198819 +SNORA67 chr1 179165608 179165714 +SOAT1 chr1 179262925 179327815 +TOR1AIP2 chr1 179809102 179846938 +TOR1AIP1 chr1 179851177 179894135 +CEP350 chr1 179923873 180084015 +QSOX1 chr1 180123969 180173165 +ACBD6 chr1 180244515 180472089 +XPR1 chr1 180601140 180859387 +STX6 chr1 180941861 180992047 +CACNA1E chr1 181382238 181777219 +GLUL chr1 182350839 182361341 +RNASEL chr1 182542769 182558391 +RGS16 chr1 182567758 182573543 +NPL chr1 182758428 182799519 +DHX9 chr1 182808504 182856886 +NMNAT2 chr1 183217372 183387737 +SMG7 chr1 183441351 183567381 +ARPC5 chr1 183592401 183604892 +RGL1 chr1 183605220 183897665 +APOBEC4 chr1 183615541 183622451 +TSEN15 chr1 184020811 184043346 +C1orf21 chr1 184356192 184598154 +EDEM3 chr1 184659365 184724047 +RNF2 chr1 185014496 185071740 +TRMT1L chr1 185087220 185126204 +IVNS1ABP chr1 185265520 185286461 +PRG4 chr1 186265405 186283694 +TPR chr1 186280784 186344825 +C1orf27 chr1 186344890 186390510 +OCLM chr1 186369704 186370587 +RGS1 chr1 192544857 192549161 +RGS13 chr1 192605275 192629390 +RGS2 chr1 192778169 192781403 +UCHL5 chr1 192981380 193029237 +TROVE2 chr1 193028552 193060907 +GLRX2 chr1 193065598 193075244 +CDC73 chr1 193091147 193223031 +B3GALT2 chr1 193148175 193155784 +MIR4735 chr1 196551543 196551611 +ZBTB41 chr1 197122810 197169672 +CRB1 chr1 197170592 197447585 +DENND1B chr1 197473878 197744826 +NEK7 chr1 198126093 198291550 +PTPRC chr1 198607801 198726545 +ZNF281 chr1 200374068 200379184 +DDX59 chr1 200593024 200639097 +CAMSAP2 chr1 200708686 200829832 +KIF21B chr1 200938520 200992828 +TMEM9 chr1 201103900 201140702 +IGFN1 chr1 201159953 201198080 +PHLDA3 chr1 201434620 201438365 +CSRP1 chr1 201452658 201478584 +NAV1 chr1 201592411 201796102 +MIR1231 chr1 201777739 201777830 +IPO9 chr1 201798269 201853422 +SHISA4 chr1 201857808 201861434 +TIMM17A chr1 201924619 201939789 +RNPEP chr1 201951500 201975275 +GPR37L1 chr1 202091986 202102720 +ARL8A chr1 202102532 202113869 +PTPRVP chr1 202137179 202158583 +PPP1R12B chr1 202317827 202561834 +SNORA70 chr1 202496438 202496555 +KDM5B chr1 202696526 202778598 +RABIF chr1 202848085 202858263 +KLHL12 chr1 202860228 202897764 +ADIPOR1 chr1 202909951 202927700 +CYB5R1 chr1 202930997 202936408 +TMEM183A chr1 202976514 202993976 +ADORA1 chr1 203059782 203136533 +BTG2 chr1 203274619 203278730 +PRELP chr1 203444956 203460480 +ATP2B4 chr1 203595689 203713209 +ZC3H11A chr1 203764782 203823252 +ZBED6 chr1 203765437 203769686 +SNRPE chr1 203830731 203839678 +ETNK2 chr1 204100190 204121307 +PPP1R15B chr1 204372515 204380919 +PIK3C2B chr1 204391756 204463852 +MDM4 chr1 204485511 204542871 +NFASC chr1 204797779 204991950 +CNTN2 chr1 205012325 205047627 +RBBP5 chr1 205055270 205091143 +DSTYK chr1 205111632 205180727 +TMCC2 chr1 205197304 205242471 +MIR135B chr1 205417430 205417526 +CDK18 chr1 205473723 205501921 +NUCKS1 chr1 205681947 205719404 +RAB7L1 chr1 205737114 205744588 +SLC41A1 chr1 205758221 205782876 +SLC26A9 chr1 205882176 205912588 +SRGAP2 chr1 206516200 206637783 +EIF2D chr1 206744620 206785904 +DYRK3 chr1 206808881 206857764 +IL10 chr1 206940947 206945839 +YOD1 chr1 207217194 207226325 +CD55 chr1 207494853 207534311 +CD46 chr1 207925402 207968858 +PLXNA2 chr1 208195587 208417665 +G0S2 chr1 209848765 209849733 +HSD11B1 chr1 209859510 209908295 +DIEXF chr1 210001352 210030910 +SYT14 chr1 210111538 210337636 +RCOR3 chr1 211431719 211489727 +SLC30A1 chr1 211744910 211752084 +INTS7 chr1 212113741 212208884 +RPL21P28 chr1 212224829 212225371 +TMEM206 chr1 212537273 212588243 +NENF chr1 212606229 212619714 +ATF3 chr1 212738676 212794119 +NSL1 chr1 212899495 212965124 +TATDN3 chr1 212965170 212989968 +FLVCR1 chr1 213031597 213072705 +VASH2 chr1 213123862 213165379 +ANGEL2 chr1 213165524 213189168 +RPS6KC1 chr1 213224589 213448116 +PROX1 chr1 214156524 214214595 +SMYD2 chr1 214454445 214510474 +KCTD3 chr1 215740735 215795149 +ESRRG chr1 216676588 217311097 +GPATCH2 chr1 217600334 217804424 +RRP15 chr1 218458629 218511325 +LYPLAL1 chr1 219347186 219386207 +EPRS chr1 220141943 220220000 +BPNT1 chr1 220230824 220263804 +IARS2 chr1 220267444 220321380 +RAB3GAP2 chr1 220321635 220445796 +MARK1 chr1 220701568 220837803 +HLX chr1 221051699 221058401 +DUSP10 chr1 221874766 221915518 +TAF1A chr1 222731244 222763275 +MIA3 chr1 222791428 222841354 +AIDA chr1 222841355 222886552 +BROX chr1 222885895 222908538 +SUSD4 chr1 223394161 223537544 +CAPN2 chr1 223889295 223963720 +TP53BP2 chr1 223967601 224033674 +FBXO28 chr1 224301789 224349749 +DEGS1 chr1 224363458 224381143 +NVL chr1 224415036 224518089 +CNIH4 chr1 224544552 224567161 +WDR26 chr1 224572845 224624735 +LBR chr1 225589204 225616627 +ENAH chr1 225674537 225840844 +SRP9 chr1 225965531 225978168 +TMEM63A chr1 226033237 226070069 +PYCR2 chr1 226107578 226111978 +SDE2 chr1 226170403 226187032 +H3F3A chr1 226249552 226259702 +ACBD3 chr1 226332380 226374431 +PARP1 chr1 226548392 226595780 +ITPKB chr1 226819391 226927024 +PSEN2 chr1 227057885 227083806 +ADCK3 chr1 227085237 227175246 +CDC42BPA chr1 227177566 227506175 +ZNF678 chr1 227751244 227865144 +SNAP47 chr1 227916240 227968927 +JMJD4 chr1 227918126 227923112 +WNT3A chr1 228194752 228248961 +ARF1 chr1 228270361 228286912 +C1orf35 chr1 228288427 228293112 +MRPL55 chr1 228294380 228297013 +GUK1 chr1 228327663 228336685 +TRIM11 chr1 228581374 228594541 +RNF187 chr1 228674762 228683467 +RHOU chr1 228870824 228882416 +RAB4A chr1 229406822 229441641 +NUP133 chr1 229577045 229644103 +TAF5L chr1 229728858 229761794 +GALNT2 chr1 230193536 230417870 +COG2 chr1 230778235 230829728 +AGT chr1 230838269 230850043 +C1orf198 chr1 230972865 231005335 +TTC13 chr1 231041989 231114621 +ARV1 chr1 231114727 231136341 +C1orf131 chr1 231359509 231376933 +GNPAT chr1 231376953 231413719 +EXOC8 chr1 231468480 231473598 +SPRTN chr1 231472850 231490769 +EGLN1 chr1 231499497 231560790 +TSNAX chr1 231664399 231702270 +DISC1 chr1 231762561 232177018 +MAP10 chr1 232940643 232946092 +NTPCR chr1 233086351 233119628 +PCNXL2 chr1 233119181 233431459 +KCNK1 chr1 233749750 233808258 +COA6 chr1 234509202 234519795 +TARBP1 chr1 234527059 234614849 +IRF2BP2 chr1 234740015 234745271 +LINC00184 chr1 234765057 234770526 +TOMM20 chr1 235272651 235292251 +RBM34 chr1 235294498 235324772 +ARID4B chr1 235294949 235491534 +GGPS1 chr1 235490665 235507847 +TBCE chr1 235530675 235612283 +B3GALNT2 chr1 235613238 235667781 +GNG4 chr1 235710987 235814054 +LYST chr1 235824341 236046940 +GPR137B chr1 236305832 236385165 +ACTN2 chr1 236849754 236927931 +MTR chr1 236958610 237067281 +CHRM3 chr1 239549865 240078750 +FH chr1 241660903 241683061 +KMO chr1 241695434 241758944 +MAP1LC3C chr1 242158792 242162375 +PLD5 chr1 242246288 242687998 +CEP170 chr1 243287730 243418650 +SDCCAG8 chr1 243419320 243663394 +AKT3 chr1 243651535 244014381 +ADSS chr1 244571796 244615436 +DESI2 chr1 244816237 244872335 +COX20 chr1 244998624 245008359 +HNRNPU-AS1 chr1 245003940 245018799 +HNRNPU chr1 245014468 245027844 +EFCAB2 chr1 245133007 245290466 +KIF26B chr1 245318287 245872733 +SMYD3 chr1 245912642 246670614 +TFB2M chr1 246703862 246729626 +CNST chr1 246729746 246831886 +SCCPDH chr1 246887349 246931439 +ZNF670 chr1 247108849 247242113 +ZNF669 chr1 247263264 247267674 +ZNF124 chr1 247285277 247335318 +ZNF496 chr1 247460714 247495148 +NLRP3 chr1 247579458 247612410 +OR2G2 chr1 247751662 247752615 +OR2L13 chr1 248100493 248264224 +OR2M3 chr1 248366332 248367392 +OR2T3 chr1 248636627 248637634 +LYPD8 chr1 248902716 248903150 +ZNF672 chr1 249132409 249143716 +ZNF692 chr1 249144205 249153343 +PGBD2 chr1 249200395 249214145 +SH3YL1 chr2 217730 266398 +ACP1 chr2 264140 278283 +TMEM18 chr2 667335 677439 +MYT1L chr2 1792885 2335032 +TSSC1 chr2 3192696 3381653 +TRAPPC12 chr2 3383446 3488865 +ADI1 chr2 3501132 3523507 +RNASEH1 chr2 3592383 3606206 +RPS7 chr2 3622795 3628509 +COLEC11 chr2 3642426 3692048 +SOX11 chr2 5832799 5841516 +RNF144A chr2 7057523 7208417 +ID2 chr2 8818975 8824583 +KIDINS220 chr2 8865408 8977760 +MBOAT2 chr2 8992820 9143942 +ASAP2 chr2 9346894 9545812 +ITGB1BP1 chr2 9543604 9563676 +CPSF3 chr2 9563697 9613230 +IAH1 chr2 9613787 9636672 +ADAM17 chr2 9628615 9695921 +YWHAQ chr2 9724101 9771143 +TAF1B chr2 9983483 10074545 +KLF11 chr2 10182976 10194963 +HPCAL1 chr2 10443015 10567743 +ODC1 chr2 10580094 10588630 +NOL10 chr2 10710892 10830101 +PDIA6 chr2 10923517 10978103 +PQLC3 chr2 11295324 11319000 +ROCK2 chr2 11319887 11488456 +E2F6 chr2 11584501 11606297 +GREB1 chr2 11674242 11782914 +LPIN1 chr2 11817721 11967535 +TRIB2 chr2 12857015 12882860 +NBAS chr2 15307032 15701454 +DDX1 chr2 15731302 15771235 +FAM49A chr2 16730727 16847599 +RAD51AP2 chr2 17691851 17699706 +SMC6 chr2 17845079 17981509 +RDH14 chr2 18735989 18741946 +OSR1 chr2 19551246 19558414 +TTC32 chr2 20096404 20101747 +LAPTM4A chr2 20232411 20251789 +PUM2 chr2 20448452 20551995 +RHOB chr2 20646835 20649200 +HS1BP3 chr2 20760208 20850849 +C2orf43 chr2 20883788 21022882 +ATAD2B chr2 23971534 24149984 +UBXN2A chr2 24150155 24227779 +C2orf44 chr2 24252210 24272445 +SF3B14 chr2 24290454 24299313 +FAM228B chr2 24299396 24392509 +TP53I3 chr2 24300303 24308731 +ITSN2 chr2 24425733 24583583 +NCOA1 chr2 24714783 24993571 +PTRHD1 chr2 25012603 25016251 +EFR3B chr2 25264999 25378243 +DTNB chr2 25600067 25896503 +KIF3C chr2 26149471 26205618 +RAB10 chr2 26256976 26360323 +GAREML chr2 26395960 26412532 +HADHA chr2 26413504 26467594 +HADHB chr2 26466038 26513336 +EPT1 chr2 26531415 26618759 +KCNK3 chr2 26915619 26956288 +SLC35F6 chr2 26987152 27004099 +DPYSL5 chr2 27070615 27173219 +MAPRE3 chr2 27193480 27250064 +TMEM214 chr2 27255778 27264563 +AGBL5 chr2 27265232 27293490 +OST4 chr2 27293340 27294641 +KHK chr2 27309615 27323640 +CGREF1 chr2 27321757 27341995 +PREB chr2 27353624 27357543 +TCF23 chr2 27371872 27376378 +SLC5A6 chr2 27422455 27435826 +ATRAID chr2 27434895 27440046 +CAD chr2 27440258 27466811 +UCN chr2 27530268 27531313 +MPV17 chr2 27532360 27548547 +GTF3C2 chr2 27548716 27579868 +EIF2B4 chr2 27587219 27593353 +SNX17 chr2 27593389 27599995 +PPM1G chr2 27604061 27632554 +NRBP1 chr2 27650657 27665126 +IFT172 chr2 27667238 27712656 +C2orf16 chr2 27799389 27805588 +ZNF512 chr2 27805897 27858041 +CCDC121 chr2 27848506 27851879 +GPN1 chr2 27851114 27874375 +SUPT7L chr2 27873679 27886676 +SLC4A1AP chr2 27886338 27917840 +MRPL33 chr2 27994584 28210954 +BRE chr2 28112808 28561768 +FOSL2 chr2 28615315 28640179 +PPP1CB chr2 28974506 29025806 +TRMT61B chr2 29072687 29093167 +WDR43 chr2 29117509 29171088 +CLIP4 chr2 29320571 29412509 +YPEL5 chr2 30369807 30383399 +LBH chr2 30454397 30546596 +LCLAT1 chr2 30670092 30867091 +EHD3 chr2 31456880 31492313 +SRD5A2 chr2 31747550 31806136 +MEMO1 chr2 32090129 32236299 +DPY30 chr2 32092878 32264881 +SPAST chr2 32288680 32382706 +SLC30A6 chr2 32390933 32449448 +YIPF4 chr2 32502979 32541663 +BIRC6 chr2 32582096 32843966 +TTC27 chr2 32853099 33046118 +MIR4765 chr2 32860322 32860398 +FAM98A chr2 33808725 33824449 +FEZ2 chr2 36778570 36873230 +STRN chr2 37070783 37193615 +HEATR5B chr2 37195526 37311485 +GPATCH11 chr2 37311594 37326387 +EIF2AK2 chr2 37326353 37384208 +CEBPZ chr2 37428755 37458856 +NDUFAF7 chr2 37458774 37480546 +RMDN2 chr2 38150330 38294285 +ATL2 chr2 38522022 38604427 +SRSF7 chr2 38970741 38978636 +GEMIN6 chr2 38978676 39012142 +DHX57 chr2 39024871 39103075 +MORN2 chr2 39103103 39156213 +SOS1 chr2 39208537 39351486 +MAP4K3 chr2 39476407 39664453 +TMEM178A chr2 39892122 39945103 +THUMPD2 chr2 39963200 40006407 +SLC8A1 chr2 40324410 40838193 +EML4 chr2 42396490 42559688 +COX7A2L chr2 42560686 42652228 +MTA3 chr2 42721709 42984087 +THADA chr2 43393800 43823185 +ZFP36L2 chr2 43449541 43453748 +PLEKHH2 chr2 43864412 43995126 +DYNC2LI1 chr2 44001178 44037149 +ABCG5 chr2 44039611 44066004 +LRPPRC chr2 44113647 44223144 +PPM1B chr2 44395108 44471523 +PREPL chr2 44543420 44589001 +CAMKMT chr2 44589089 44999731 +SRBD1 chr2 45615819 45839304 +PRKCE chr2 45878484 46415129 +EPAS1 chr2 46520806 46613836 +RHOQ chr2 46768945 46810260 +PIGF chr2 46808076 46844258 +CRIPT chr2 46843555 46852881 +SOCS5 chr2 46926091 46990268 +MCFD2 chr2 47129009 47168994 +C2orf61 chr2 47272677 47382517 +CALM2 chr2 47387221 47403740 +BCYRN1 chr2 47558199 47571656 +MSH2 chr2 47630108 47789450 +MSH6 chr2 47922669 48037240 +FBXO11 chr2 48016455 48132932 +FOXN2 chr2 48541776 48606433 +PPP1R21 chr2 48667737 48742525 +NRXN1 chr2 50145643 51259674 +GPR75-ASB3 chr2 53759810 54087170 +ASB3 chr2 53897430 54087297 +ERLEC1 chr2 54014181 54045956 +GPR75 chr2 54080050 54087126 +PSME4 chr2 54091204 54197977 +ACYP2 chr2 54197975 54532437 +SPTBN1 chr2 54683422 54896812 +RTN4 chr2 55199325 55339757 +CLHC1 chr2 55401927 55459699 +RPS27A chr2 55459039 55462989 +MTIF2 chr2 55463731 55496483 +CCDC88A chr2 55514978 55647057 +CCDC104 chr2 55746740 55773015 +SMEK2 chr2 55774428 55846015 +PNPT1 chr2 55861400 55921045 +MIR216A chr2 56216085 56216194 +FANCL chr2 58386378 58468507 +PAPOLG chr2 60983365 61029220 +REL chr2 61108656 61158745 +PEX13 chr2 61244360 61279125 +C2orf74 chr2 61372243 61391964 +AHSA2 chr2 61404553 61418338 +USP34 chr2 61414598 61697904 +XPO1 chr2 61704984 61765761 +FAM161A chr2 62051989 62081278 +CCT4 chr2 62095224 62115939 +COMMD1 chr2 62115859 62374382 +B3GNT2 chr2 62423248 62451866 +TMEM17 chr2 62727356 62739029 +EHBP1 chr2 62900986 63273622 +WDPCP chr2 63348518 64054977 +MDH1 chr2 63815743 63834331 +UGP2 chr2 64068074 64118696 +PELI1 chr2 64319786 64371588 +LGALSL chr2 64681103 64688515 +AFTPH chr2 64751465 64820139 +SLC1A4 chr2 65215611 65250999 +CEP68 chr2 65283500 65314138 +RAB1A chr2 65297835 65357240 +ACTR2 chr2 65454887 65498387 +SPRED2 chr2 65537985 65659771 +MIR4778 chr2 66585381 66585460 +MEIS1 chr2 66660584 66801001 +ETAA1 chr2 67624451 67637677 +C1D chr2 68268262 68338080 +WDR92 chr2 68350068 68384692 +PNO1 chr2 68384976 68403370 +PPP3R1 chr2 68405989 68483369 +CNRIP1 chr2 68511303 68547183 +PLEK chr2 68592305 68624585 +FBXO48 chr2 68686551 68694390 +ANTXR1 chr2 69240310 69476459 +GFPT1 chr2 69546905 69614382 +NFU1 chr2 69622882 69664760 +ANXA4 chr2 69871557 70053596 +GMCL1 chr2 70056774 70108528 +SNRNP27 chr2 70120692 70132707 +MXD1 chr2 70124820 70170077 +PCBP1 chr2 70314585 70316332 +C2orf42 chr2 70377012 70475747 +TIA1 chr2 70436576 70475792 +PCYOX1 chr2 70484518 70508323 +SNRPG chr2 70508494 70520903 +FAM136A chr2 70523107 70529222 +TGFA chr2 70674412 70781325 +ADD2 chr2 70834750 70995357 +VAX2 chr2 71127720 71160576 +TEX261 chr2 71213068 71222075 +NAGK chr2 71291474 71306935 +MCEE chr2 71336814 71357369 +MPHOSPH10 chr2 71357444 71377231 +ZNF638 chr2 71503691 71662199 +DYSF chr2 71680852 71913898 +EXOC6B chr2 72403113 73053170 +SPR chr2 73114489 73119287 +SFXN5 chr2 73169165 73302747 +SMYD5 chr2 73441350 73454365 +PRADC1 chr2 73455134 73460366 +CCT7 chr2 73460548 73480149 +ALMS1 chr2 73612886 73837920 +TPRKB chr2 73956231 73964527 +DUSP11 chr2 73989311 74007284 +STAMBP chr2 74056086 74100786 +ACTG2 chr2 74119441 74146992 +DGUOK chr2 74153953 74186088 +BOLA3 chr2 74362525 74375121 +BOLA3-AS1 chr2 74375136 74377188 +MOB1A chr2 74379655 74406025 +MTHFD2 chr2 74425689 74444692 +DCTN1 chr2 74588281 74619214 +C2orf81 chr2 74641304 74648718 +WDR54 chr2 74648805 74652882 +RTKN chr2 74652963 74669549 +WBP1 chr2 74685456 74688018 +MOGS chr2 74688184 74692537 +MRPL53 chr2 74699085 74700449 +CCDC142 chr2 74699113 74710535 +TTC31 chr2 74710200 74722013 +LBX2-AS1 chr2 74729722 74731805 +PCGF1 chr2 74732170 74735707 +AUP1 chr2 74753772 74757066 +HTRA2 chr2 74756504 74760472 +LOXL3 chr2 74759541 74782817 +DOK1 chr2 74776153 74784681 +SEMA4F chr2 74881355 74909186 +HK2 chr2 75061108 75120486 +POLE4 chr2 75185619 75197255 +MRPL19 chr2 75873909 75917977 +GCFC2 chr2 75879126 75938115 +LRRTM4 chr2 76974845 77820445 +REG1A chr2 79347488 79350545 +CTNNA2 chr2 79412357 80875905 +SUCLG1 chr2 84650647 84687169 +TMSB10 chr2 85132749 85133795 +TGOLN2 chr2 85545147 85555548 +RETSAT chr2 85569211 85581743 +ELMOD3 chr2 85581517 85618875 +CAPG chr2 85621871 85645555 +MAT2A chr2 85766288 85772403 +GGCX chr2 85771846 85788670 +VAMP8 chr2 85788685 85809154 +RNF181 chr2 85822848 85824736 +TMEM150A chr2 85825671 85830319 +USP39 chr2 85829979 85876403 +C2orf68 chr2 85833777 85839189 +SFTPB chr2 85884437 85895864 +ATOH8 chr2 85978467 86015189 +ST3GAL5 chr2 86066267 86116137 +POLR1A chr2 86247339 86333278 +PTCD3 chr2 86333305 86369280 +IMMT chr2 86371055 86422893 +MRPL35 chr2 86426478 86440917 +REEP1 chr2 86441116 86565206 +KDM3A chr2 86667770 86719839 +CHMP3 chr2 86730554 86948245 +RNF103 chr2 86830516 86850989 +RMND5A chr2 86947296 87005164 +RGPD2 chr2 88055474 88285309 +KRCC1 chr2 88326724 88355248 +ANKRD36BP2 chr2 89065324 89106126 +MAL chr2 95691422 95719737 +MRPS5 chr2 95752952 95815179 +ZNF514 chr2 95813075 95831158 +ZNF2 chr2 95831177 95850065 +PROM2 chr2 95940201 95957056 +KCNIP3 chr2 95963052 96051825 +FAHD2A chr2 96068469 96082364 +FAHD2CP chr2 96676299 96689128 +STARD7 chr2 96850597 96874563 +TMEM127 chr2 96914254 96931732 +CIAO1 chr2 96931870 96939087 +SNRNP200 chr2 96940074 96971297 +ARID5A chr2 97202480 97218375 +KANSL3 chr2 97258907 97308524 +LMAN2L chr2 97371666 97405801 +CNNM3 chr2 97481982 97499648 +ANKRD39 chr2 97502348 97523832 +SEMA4C chr2 97525453 97536494 +FAHD2B chr2 97749320 97760619 +ANKRD36 chr2 97779233 97930258 +ANKRD36B chr2 98108978 98206428 +COX5B chr2 98262503 98264846 +ACTR1B chr2 98272431 98280570 +TMEM131 chr2 98372799 98612388 +COA5 chr2 99215773 99224978 +UNC50 chr2 99225042 99234978 +MGAT4A chr2 99235569 99347589 +LIPT1 chr2 99771418 99779620 +MITD1 chr2 99777890 99797521 +MRPL30 chr2 99797542 99814089 +TXNDC9 chr2 99935445 99957165 +EIF5B chr2 99953816 100017789 +REV1 chr2 100016938 100106497 +AFF3 chr2 100162323 100759201 +CHST10 chr2 101008327 101034118 +PDCL3 chr2 101179152 101193197 +RPL31 chr2 101618177 101640494 +CNOT11 chr2 101869264 101886778 +RNF149 chr2 101887681 101925163 +MAP4K4 chr2 102313312 102511149 +IL1RL1 chr2 102927962 102968497 +MRPS9 chr2 105654441 105716418 +GPR45 chr2 105858200 105859924 +C2orf49 chr2 105953816 105965668 +NCK2 chr2 106361354 106510730 +UXS1 chr2 106709759 106810795 +ST6GAL2 chr2 107418056 107503564 +RGPD4 chr2 108443388 108507297 +GCC2 chr2 109065017 109125871 +LIMS1 chr2 109150857 109303702 +RANBP2 chr2 109335937 109402267 +EDAR chr2 109510927 109605828 +SEPT10 chr2 110300559 110371783 +RGPD5 chr2 110550335 110615272 +MALL chr2 110841447 110874143 +LINC00116 chr2 110969106 111002997 +MIR4435-2 chr2 112078589 112078668 +ANAPC1 chr2 112523848 112642267 +MERTK chr2 112656056 112787138 +ZC3H8 chr2 112969102 113012713 +TTL chr2 113239731 113299316 +POLR1B chr2 113299492 113334635 +CHCHD5 chr2 113341817 113346852 +SLC20A1 chr2 113403434 113421404 +IL1A chr2 113531492 113542167 +IL1B chr2 113587328 113594480 +IL1F10 chr2 113825547 113833427 +CBWD2 chr2 114195268 114253766 +WASH2P chr2 114341664 114356655 +RPL23AP7 chr2 114368079 114384667 +RABL2A chr2 114384806 114400973 +SLC35F5 chr2 114462588 114514400 +ACTR3 chr2 114647537 114720173 +DDX18 chr2 118572226 118589955 +CCDC93 chr2 118673054 118771709 +INSIG2 chr2 118846028 118868573 +C1QL2 chr2 119913819 119916465 +DBI chr2 120124497 120130126 +EPB41L5 chr2 120770581 120936695 +TMEM185B chr2 120978854 120980984 +RALB chr2 120997640 121052289 +CLASP1 chr2 122095352 122407163 +MKI67IP chr2 122484521 122494499 +TSN chr2 122494679 122525429 +CNTNAP5 chr2 124782864 125672864 +GYPC chr2 127413509 127454246 +BIN1 chr2 127805603 127864931 +ERCC3 chr2 128014866 128051752 +MAP3K2 chr2 128056306 128146041 +IWS1 chr2 128193783 128284462 +LIMS2 chr2 128395956 128439360 +GPR17 chr2 128403439 128410213 +WDR33 chr2 128458596 128568761 +POLR2D chr2 128603840 128615731 +AMMECR1L chr2 128619204 128643496 +SAP130 chr2 128698791 128785694 +UGGT1 chr2 128848774 128953251 +HS6ST1 chr2 128994290 129076151 +CCDC74B chr2 130896860 130902707 +SMPD4 chr2 130908981 130940323 +MZT2B chr2 130939310 130948302 +CCDC115 chr2 131095814 131099922 +IMP4 chr2 131099798 131105383 +PTPN18 chr2 131113580 131132982 +GPR148 chr2 131486643 131487909 +ARHGEF4 chr2 131594489 131804836 +FAM168B chr2 131805449 131851033 +PLEKHB2 chr2 131862420 132111282 +MZT2A chr2 132222473 132250316 +C2orf27A chr2 132479948 132524973 +NCKAP5 chr2 133429374 134326034 +TMEM163 chr2 135213330 135476570 +ACMSD chr2 135596117 135659604 +CCNT2 chr2 135675805 135716912 +RAB3GAP1 chr2 135809835 135933964 +R3HDM1 chr2 136289025 136482840 +UBXN4 chr2 136499189 136542625 +MCM6 chr2 136597196 136633996 +DARS chr2 136664247 136743670 +CXCR4 chr2 136871919 136875735 +HNMT chr2 138721590 138773930 +LRP1B chr2 140988992 142889270 +GTDC1 chr2 144695635 145090135 +ZEB2 chr2 145141648 145282147 +ACVR2A chr2 148602086 148688393 +ORC4 chr2 148687968 148779147 +MBD5 chr2 148778580 149275805 +EPC2 chr2 149402009 149545130 +KIF5C chr2 149632819 149883273 +MMADHC chr2 150426148 150444330 +RND3 chr2 151324709 151395525 +NMI chr2 152126979 152146571 +RIF1 chr2 152266397 152364527 +ARL5A chr2 152645498 152685006 +CACNB4 chr2 152689290 152955593 +STAM2 chr2 152973315 153032506 +FMNL2 chr2 153191751 153506348 +PRPF40A chr2 153508107 153574511 +ARL6IP6 chr2 153574407 153617767 +GALNT13 chr2 154728426 155310361 +NR4A2 chr2 157180944 157198860 +GPD2 chr2 157291802 157470247 +ERMN chr2 158175137 158184225 +FAM133DP chr2 158236236 158236926 +PKP4 chr2 159313476 159539391 +TANC1 chr2 159825146 160089170 +WDSUB1 chr2 160092304 160143310 +BAZ2B chr2 160175490 160473203 +MARCH7 chr2 160569000 160627538 +MIR4785 chr2 161264321 161264393 +TANK chr2 161993419 162092732 +PSMD14 chr2 162164549 162268228 +GCA chr2 163175350 163228105 +SCN2A chr2 166095912 166248818 +CSRNP3 chr2 166326157 166545917 +GALNT3 chr2 166604101 166651192 +TTC21B chr2 166713985 166810353 +SCN1A chr2 166845670 166984523 +CERS6-AS1 chr2 169628463 169643471 +SPC25 chr2 169690642 169769881 +DHRS9 chr2 169921299 169952677 +BBS5 chr2 170335688 170382432 +FASTKD1 chr2 170386259 170430385 +PPIG chr2 170440850 170497916 +PHOSPHO2 chr2 170550975 170558218 +KLHL23 chr2 170550998 170633499 +SSB chr2 170648443 170668574 +METTL5 chr2 170666591 170681441 +UBR3 chr2 170683968 170940641 +GAD1 chr2 171669723 171717661 +GORASP2 chr2 171784974 171823639 +TLK1 chr2 171847333 172087824 +DCAF17 chr2 172290727 172341562 +DYNC1I2 chr2 172543919 172604930 +SLC25A12 chr2 172640880 172864766 +HAT1 chr2 172778958 172848599 +METAP1D chr2 172864490 172947158 +DLX2 chr2 172964167 172967628 +PDK1 chr2 173420101 173489823 +RAPGEF4 chr2 173600002 173917621 +SP3 chr2 174771187 174830430 +OLA1 chr2 174937175 175113426 +CIR1 chr2 175212750 175260443 +SCRN3 chr2 175260458 175294303 +GPR155 chr2 175296966 175351822 +WIPF1 chr2 175424300 175547644 +H3F3AP4 chr2 175584505 175585526 +CHN1 chr2 175664091 175870097 +ATF2 chr2 175936978 176033110 +ATP5G3 chr2 176040986 176049335 +KIAA1715 chr2 176788620 176867567 +MIR10B chr2 177015031 177015140 +MTX2 chr2 177134123 177202753 +HNRNPA3 chr2 178077291 178088686 +NFE2L2 chr2 178092323 178257425 +AGPS chr2 178257372 178408564 +TTC30B chr2 178413726 178417742 +RBM45 chr2 178977151 179003738 +OSBPL6 chr2 179059208 179264160 +PRKRA chr2 179296141 179316239 +TTN chr2 179390716 179695529 +SESTD1 chr2 179966419 180129517 +CWC22 chr2 180809603 180871840 +UBE2E3 chr2 181831975 181941312 +CERKL chr2 182401403 182545392 +NEUROD1 chr2 182537815 182545603 +SSFA2 chr2 182756560 182795465 +DNAJC10 chr2 183580999 183659191 +NCKAP1 chr2 183773843 183903586 +NUP35 chr2 183982241 184026408 +FSIP2 chr2 186603355 186698017 +ZC3H15 chr2 187350883 187374090 +ITGAV chr2 187454792 187545628 +FAM171B chr2 187558698 187630685 +COL3A1 chr2 189839046 189877472 +WDR75 chr2 190306159 190340291 +SLC40A1 chr2 190425305 190448484 +ASNSD1 chr2 190526111 190535557 +OSGEPL1 chr2 190611386 190627953 +ORMDL1 chr2 190635049 190649097 +PMS1 chr2 190649107 190742355 +HIBCH chr2 191054461 191208919 +INPP1 chr2 191208196 191236391 +MFSD6 chr2 191273081 191373931 +NAB1 chr2 191511472 191557492 +GLS chr2 191745553 191830278 +STAT1 chr2 191829084 191885686 +STAT4 chr2 191894302 192016322 +NABP1 chr2 192542794 192561385 +TMEFF2 chr2 192813769 193060435 +SLC39A10 chr2 196440701 196602426 +STK17B chr2 196998290 197041227 +CCDC150 chr2 197504278 197628214 +GTF3C3 chr2 197627756 197664449 +PGAP1 chr2 197697728 197792520 +ANKRD44 chr2 197831741 198175897 +SF3B1 chr2 198254508 198299815 +COQ10B chr2 198318147 198340032 +HSPD1 chr2 198351305 198381461 +HSPE1 chr2 198364718 198368181 +HSPE1-MOB4 chr2 198365137 198415450 +MOB4 chr2 198380295 198418423 +RFTN2 chr2 198432948 198540769 +MARS2 chr2 198570087 198573113 +C2orf69 chr2 200775979 200820658 +TYW5 chr2 200794698 200820459 +C2orf47 chr2 200820040 200873263 +KCTD18 chr2 201353675 201384507 +BZW1 chr2 201675317 201688569 +CLK1 chr2 201717732 201729422 +PPIL3 chr2 201735630 201754026 +NIF3L1 chr2 201754050 201768655 +ORC2 chr2 201773696 201828403 +FAM126B chr2 201838441 201936394 +NDUFB3 chr2 201936156 201950473 +CFLAR chr2 201980827 202041410 +TRAK2 chr2 202241930 202316302 +STRADB chr2 202252581 202345569 +ALS2CR11 chr2 202352148 202483901 +TMEM237 chr2 202484907 202508293 +ALS2 chr2 202565277 202645912 +SUMO1 chr2 203070903 203103331 +NOP58 chr2 203130439 203168389 +BMPR2 chr2 203241659 203432474 +FAM117B chr2 203499901 203634480 +ICA1L chr2 203640690 203736708 +WDR12 chr2 203739505 203879521 +CYP20A1 chr2 204103663 204163009 +ABI2 chr2 204192942 204312446 +RAPH1 chr2 204259068 204400133 +ICOS chr2 204801471 204826300 +INO80D chr2 206858445 206951027 +NDUFS1 chr2 206979541 207024327 +GCSHP3 chr2 206980834 206981224 +EEF1B2 chr2 207024309 207027652 +DYTN chr2 207516345 207583120 +FASTKD2 chr2 207630081 207657233 +KLF7 chr2 207938861 208031991 +CREB1 chr2 208394461 208468155 +METTL21A chr2 208445355 208490652 +MIR4775 chr2 208619531 208619605 +PLEKHM3 chr2 208693027 208890284 +IDH1 chr2 209100951 209130798 +PIKFYVE chr2 209130991 209223475 +MAP2 chr2 210288782 210598842 +UNC80 chr2 210636717 210864024 +RPE chr2 210867289 210886300 +LANCL1 chr2 211295973 211342376 +ERBB4 chr2 212240446 213403565 +SPAG16 chr2 214149113 215275225 +BARD1 chr2 215590370 215674428 +ATIC chr2 216176540 216214487 +FN1 chr2 216225163 216300895 +PECR chr2 216861052 216947678 +TMEM169 chr2 216946589 216967506 +XRCC5 chr2 216972187 217071026 +SMARCAL1 chr2 217277137 217347776 +RPL37A chr2 217362912 217443903 +IGFBP5 chr2 217536828 217560248 +TNS1 chr2 218664512 218867718 +ARPC2 chr2 219081817 219119079 +AAMP chr2 219128850 219134980 +PNKD chr2 219135115 219211516 +TMBIM1 chr2 219138915 219157309 +SLC11A1 chr2 219246752 219261617 +CTDSP1 chr2 219262979 219270664 +USP37 chr2 219314974 219433084 +RQCD1 chr2 219433303 219461803 +PLCD4 chr2 219472488 219501907 +ZNF142 chr2 219502639 219524378 +BCS1L chr2 219523487 219528166 +RNF25 chr2 219528587 219537134 +STK36 chr2 219536749 219567439 +TTLL4 chr2 219575568 219620139 +CYP27A1 chr2 219646472 219680016 +CDK5R2 chr2 219824377 219826876 +NHEJ1 chr2 219940039 220025587 +CNPPD1 chr2 220036619 220042828 +FAM134A chr2 220040947 220050201 +ZFAND2B chr2 220059959 220074373 +ABCB6 chr2 220074490 220083712 +ATG9A chr2 220074494 220094439 +ANKZF1 chr2 220094479 220101391 +DNAJB2 chr2 220143989 220151622 +PTPRN chr2 220154345 220174370 +DNPEP chr2 220238268 220264744 +GMPPA chr2 220363589 220371710 +ASIC4 chr2 220378892 220403494 +CHPF chr2 220403669 220408509 +TMEM198 chr2 220408385 220415317 +OBSL1 chr2 220415451 220436581 +STK11IP chr2 220462582 220481173 +SGPP2 chr2 223289236 223425667 +FARSB chr2 223435255 223521056 +ACSL3 chr2 223725652 223809357 +SCG2 chr2 224461658 224467221 +WDFY1 chr2 224720433 224810104 +MRPL44 chr2 224822121 224832431 +SERPINE2 chr2 224839829 224904036 +FAM124B chr2 225243415 225266802 +CUL3 chr2 225334867 225450110 +DOCK10 chr2 225629807 225907162 +RHBDD1 chr2 227700297 227863931 +MFF chr2 228189867 228222550 +SPHKAP chr2 228844666 229046361 +PID1 chr2 229715242 230136001 +DNER chr2 230222345 230579274 +TRIP12 chr2 230628554 230787955 +FBXO36 chr2 230787018 230877825 +SP110 chr2 231032009 231090444 +SP100 chr2 231280657 231408805 +CAB39 chr2 231577560 231685790 +ITM2C chr2 231729354 231743963 +GPR55 chr2 231772033 231825781 +PSMD1 chr2 231921578 232037541 +ARMC9 chr2 232063260 232239548 +NCL chr2 232318242 232348352 +SNORD82 chr2 232325082 232325151 +PTMA chr2 232571605 232578251 +PDE6D chr2 232597135 232650982 +COPS7B chr2 232646381 232673963 +DIS3L2 chr2 232825955 233209060 +ALPP chr2 233243244 233247599 +TIGD1 chr2 233412779 233415226 +EIF4E2 chr2 233414762 233448354 +MIR5001 chr2 233415184 233415283 +EFHD1 chr2 233470767 233547491 +GIGYF2 chr2 233562009 233725285 +INPP5D chr2 233924677 234116549 +ATG16L1 chr2 234118697 234204320 +SAG chr2 234216462 234255701 +DGKD chr2 234263153 234380750 +USP40 chr2 234384166 234475428 +UGT1A3 chr2 234637754 234681945 +SH3BP4 chr2 235860617 235964358 +AGAP1 chr2 236402733 237040444 +GBX2 chr2 237073879 237077012 +COPS8 chr2 237993955 238009109 +LRRFIP1 chr2 238536219 238722325 +RAMP1 chr2 238767536 238820756 +UBE2F chr2 238875469 238951236 +SCLY chr2 238969530 239008054 +ILKAP chr2 239079042 239112370 +HES6 chr2 239146908 239149303 +PER2 chr2 239152679 239198743 +ASB1 chr2 239335383 239360891 +HDAC4 chr2 239969864 240323348 +NDUFA10 chr2 240831867 240964819 +PRR21 chr2 240981230 240982399 +MYEOV2 chr2 241065980 241076224 +GPC1 chr2 241375088 241407493 +ANKMY1 chr2 241418839 241508626 +DUSP28 chr2 241499471 241503431 +RNPEPL1 chr2 241505221 241520789 +GPR35 chr2 241544848 241570676 +KIF1A chr2 241653181 241759725 +MTERFD2 chr2 242011584 242041747 +PPP1R7 chr2 242088991 242123067 +ANO7 chr2 242127924 242164792 +HDLBP chr2 242166679 242256476 +SEPT2 chr2 242254515 242293442 +FARP2 chr2 242295658 242434256 +STK25 chr2 242432089 242449145 +THAP4 chr2 242523820 242576864 +ATG4B chr2 242576628 242613272 +DTYMK chr2 242615157 242626406 +ING5 chr2 242641450 242668893 +D2HGDH chr2 242673994 242708231 +NEU4 chr2 242749920 242758739 +IL5RA chr3 3111233 3168297 +TRNT1 chr3 3168600 3192563 +CRBN chr3 3190676 3221394 +SUMF1 chr3 3742498 4508965 +LRRN1 chr3 3841121 3889387 +SETMAR chr3 4344988 4359251 +BHLHE40 chr3 5020801 5027008 +ARL8B chr3 5163905 5222596 +RAD18 chr3 8817088 9005457 +SRGAP3 chr3 9022275 9404737 +SETD5-AS1 chr3 9391373 9440263 +THUMPD3 chr3 9404526 9428475 +SETD5 chr3 9439299 9520924 +LHFPL4 chr3 9543481 9595486 +MTMR14 chr3 9691117 9744077 +OGG1 chr3 9791628 9829903 +CAMK1 chr3 9799026 9811676 +TADA3 chr3 9821544 9834695 +ARPC4 chr3 9834179 9849410 +TTLL3 chr3 9849770 9896822 +RPUSD3 chr3 9879533 9886286 +JAGN1 chr3 9932238 9936033 +CRELD1 chr3 9975506 9987097 +EMC3 chr3 10004221 10052800 +CIDECP chr3 10055932 10068049 +BRK1 chr3 10157276 10168874 +VHL chr3 10182692 10193904 +IRAK2 chr3 10206549 10285427 +TATDN2 chr3 10289707 10322902 +SEC13 chr3 10334815 10362862 +SLC6A1 chr3 11034410 11080933 +SLC6A1-AS1 chr3 11047784 11060910 +ATG7 chr3 11313995 11599139 +VGLL4 chr3 11597544 11762220 +TAMM41 chr3 11831916 11888393 +TIMP4 chr3 12194551 12200851 +TSEN2 chr3 12525931 12581122 +MKRN2 chr3 12598513 12625212 +RAF1 chr3 12625100 12705725 +CAND2 chr3 12837971 12913415 +RPL32 chr3 12875984 12883087 +IQSEC1 chr3 12938719 13114617 +HDAC11 chr3 13521224 13547916 +WNT7A chr3 13857755 13921618 +CHCHD4 chr3 14153580 14166370 +TMEM43 chr3 14166440 14185179 +XPC chr3 14186647 14220283 +LSM3 chr3 14219858 14242619 +CCDC174 chr3 14693271 14714166 +C3orf20 chr3 14716606 14814541 +FGD5-AS1 chr3 14961854 14989931 +NR2C2 chr3 14989091 15095107 +MRPS25 chr3 15083967 15106842 +ZFYVE20 chr3 15111580 15140670 +CAPN7 chr3 15247659 15294425 +SH3BP5 chr3 15296360 15382875 +METTL6 chr3 15422782 15482073 +EAF1 chr3 15468862 15484120 +HACL1 chr3 15602211 15643338 +BTD chr3 15642848 15687329 +ANKRD28 chr3 15708743 15901278 +GALNT15 chr3 16216156 16273499 +DPH3 chr3 16299485 16306479 +OXNAD1 chr3 16306706 16391806 +PLCL2 chr3 16844159 17132086 +TBC1D5 chr3 17198654 18486309 +SATB1 chr3 18386879 18487080 +KCNH8 chr3 19189946 19577138 +EFHB chr3 19920964 19988517 +RAB5A chr3 19988571 20026667 +KAT2B chr3 20081515 20195896 +UBE2E2 chr3 23244511 23633284 +UBE2E1 chr3 23847394 23932807 +NKIRAS1 chr3 23933151 23988082 +RPL15 chr3 23958036 23965183 +NR1D2 chr3 23986751 24022109 +TOP2B chr3 25639475 25706398 +NGLY1 chr3 25760435 25831530 +OXSM chr3 25824408 25836025 +SLC4A7 chr3 27414214 27525911 +CMC1 chr3 28283075 28366633 +AZI2 chr3 28356494 28390618 +LINC00693 chr3 28616282 28799831 +STT3B chr3 31574130 31679112 +GPD1L chr3 32147181 32210205 +CMTM7 chr3 32433163 32524559 +CMTM6 chr3 32522804 32544900 +DYNC1LI1 chr3 32567463 32612366 +CNOT10 chr3 32726637 32815367 +GLB1 chr3 33038100 33138722 +TMPPE chr3 33131913 33138293 +CRTAP chr3 33155471 33189265 +SUSD5 chr3 33191537 33260707 +FBXL2 chr3 33318517 33445154 +UBP1 chr3 33429828 33482863 +CLASP2 chr3 33537737 33759848 +PDCD6IP chr3 33839844 33911194 +ARPP21 chr3 35680437 35835988 +EPM2AIP1 chr3 37027357 37034795 +MLH1 chr3 37034823 37107380 +LRRFIP2 chr3 37094117 37225180 +GOLGA4 chr3 37284668 37408242 +CTDSPL chr3 37903451 38025960 +PLCD1 chr3 38048987 38071253 +ACAA1 chr3 38144620 38178733 +MYD88 chr3 38179969 38184513 +OXSR1 chr3 38206580 38296979 +EXOG chr3 38537618 38583652 +SCN11A chr3 38887260 38992052 +WDR48 chr3 39093489 39138155 +GORASP1 chr3 39138150 39149854 +CSRNP1 chr3 39183346 39196053 +CX3CR1 chr3 39304985 39323226 +SLC25A38 chr3 39424839 39438842 +RPSA chr3 39448180 39454033 +SNORA6 chr3 39449880 39450030 +MOBP chr3 39508689 39570970 +EIF1B chr3 40351175 40353915 +RPL14 chr3 40498783 40503861 +ZNF619 chr3 40518604 40531727 +ZNF620 chr3 40547483 40560227 +ZNF621 chr3 40566369 40616176 +CTNNB1 chr3 41236328 41301587 +TRAK1 chr3 42055294 42267381 +SEC22C chr3 42589461 42642572 +SS18L2 chr3 42623332 42636606 +NKTR chr3 42642106 42690227 +ZBTB47 chr3 42695176 42709072 +KLHL40 chr3 42727011 42734036 +HHATL chr3 42734155 42744319 +HIGD1A chr3 42798669 42846023 +SNRK chr3 43328004 43466256 +SNRK-AS1 chr3 43391136 43393454 +ANO10 chr3 43396351 43733086 +ABHD5 chr3 43731605 43775863 +TCAIM chr3 44379611 44450943 +ZNF445 chr3 44481262 44519162 +ZKSCAN7 chr3 44596685 44624975 +ZNF660 chr3 44619715 44641186 +ZNF197 chr3 44626380 44689963 +ZNF35 chr3 44690219 44702283 +ZNF502 chr3 44754135 44765323 +ZNF501 chr3 44771088 44778575 +KIAA1143 chr3 44779153 44803154 +TMEM42 chr3 44903361 44907162 +EXOSC7 chr3 45016733 45077558 +CLEC3B chr3 45043040 45077563 +LARS2 chr3 45429998 45590913 +LIMD1 chr3 45596886 45727830 +SACM1L chr3 45730548 45786916 +LZTFL1 chr3 45864808 45957534 +CCR9 chr3 45927996 45944667 +CCR1 chr3 46243200 46249887 +RTP3 chr3 46538981 46542439 +PTH1R chr3 46919236 46945287 +CCDC12 chr3 46963216 47023500 +SETD2 chr3 47057919 47205457 +KLHL18 chr3 47324407 47388306 +PTPN23 chr3 47422501 47454931 +SCAP chr3 47455203 47518616 +ELP6 chr3 47537130 47555251 +CSPG5 chr3 47603729 47622282 +SMARCC1 chr3 47626762 47823596 +DHX30 chr3 47844399 47891685 +MAP4 chr3 47892182 48130769 +ZNF589 chr3 48282590 48340743 +NME6 chr3 48334754 48343175 +PLXNB1 chr3 48445261 48471594 +CCDC51 chr3 48473574 48481866 +TMA7 chr3 48481667 48485616 +TREX1 chr3 48506445 48509044 +SHISA5 chr3 48509197 48542259 +PFKFB4 chr3 48555117 48599448 +UCN2 chr3 48599160 48601206 +UQCRC1 chr3 48636435 48648409 +SLC26A6 chr3 48663156 48672926 +CELSR3 chr3 48673902 48700348 +NCKIPSD chr3 48701364 48723797 +IP6K2 chr3 48725436 48777786 +PRKAR2A chr3 48782030 48885279 +SLC25A20 chr3 48894369 48936426 +ARIH2 chr3 48956254 49023815 +P4HTM chr3 49027319 49044587 +WDR6 chr3 49044495 49053386 +DALRD3 chr3 49052921 49059726 +NDUFAF3 chr3 49057892 49060928 +IMPDH2 chr3 49061758 49066841 +QRICH1 chr3 49067140 49131796 +QARS chr3 49133365 49142553 +USP19 chr3 49145479 49158371 +KLHDC8B chr3 49209044 49213917 +C3orf62 chr3 49306035 49315342 +USP4 chr3 49315264 49378145 +GPX1 chr3 49394609 49396033 +RHOA chr3 49396578 49450431 +TCTA chr3 49449639 49453908 +AMT chr3 49454211 49460186 +NICN1 chr3 49460379 49466759 +DAG1 chr3 49506146 49573048 +APEH chr3 49711435 49721396 +MST1 chr3 49721380 49726934 +RNF123 chr3 49726932 49758962 +GMPPB chr3 49754277 49761384 +IP6K1 chr3 49761727 49823975 +FAM212A chr3 49840687 49842463 +UBA7 chr3 49842640 49851379 +CAMKV chr3 49895421 49907655 +MON1A chr3 49946302 49967606 +RBM6 chr3 49977440 50137478 +RBM5 chr3 50126341 50156454 +SLC38A3 chr3 50242679 50258411 +GNAI2 chr3 50263724 50296787 +SEMA3B chr3 50304990 50314977 +IFRD2 chr3 50325163 50330349 +NAT6 chr3 50333833 50336852 +HYAL2 chr3 50355221 50360337 +TUSC2 chr3 50357458 50365682 +RASSF1 chr3 50367219 50378411 +NPRL2 chr3 50384761 50388522 +CYB561D2 chr3 50388126 50395891 +TMEM115 chr3 50392180 50397041 +C3orf18 chr3 50595462 50608458 +HEMK1 chr3 50606583 50622366 +MAPKAPK3 chr3 50648951 50686720 +DOCK3 chr3 50712672 51421629 +MANF chr3 51422478 51426828 +VPRBP chr3 51433298 51534010 +RAD54L2 chr3 51575160 51702683 +TEX264 chr3 51696709 51738339 +RRP9 chr3 51967446 51975957 +PARP3 chr3 51976361 51982883 +GPR62 chr3 51989330 51991509 +PCBP4 chr3 51991470 52002426 +ABHD14B chr3 52002526 52017425 +ABHD14A chr3 52005442 52015212 +ACY1 chr3 52009066 52023213 +RPL29 chr3 52027616 52029958 +DUSP7 chr3 52082935 52090566 +POC1A chr3 52109269 52188706 +ALAS1 chr3 52232102 52248343 +TWF2 chr3 52262626 52273276 +WDR82 chr3 52288437 52322036 +GLYCTK chr3 52321105 52329272 +BAP1 chr3 52435029 52444366 +NISCH chr3 52489134 52527087 +STAB1 chr3 52529354 52558511 +NT5DC2 chr3 52558386 52569070 +PBRM1 chr3 52579368 52719933 +GNL3 chr3 52715172 52728508 +SNORD19B chr3 52722898 52722977 +GLT8D1 chr3 52728505 52740048 +SPCS1 chr3 52738971 52742182 +NEK4 chr3 52744800 52804965 +TMEM110-MUSTN1 chr3 52867137 52931578 +RFT1 chr3 53122499 53164478 +TKT chr3 53258723 53290068 +DCP1A chr3 53317447 53381654 +CHDH chr3 53846362 53880417 +IL17RB chr3 53880607 53899827 +ACTR8 chr3 53901093 53916229 +SELK chr3 53918437 53926015 +ERC2 chr3 55542336 56502391 +CCDC66 chr3 56591189 56655846 +FAM208A chr3 56654161 56717265 +IL17RD chr3 57124010 57204334 +APPL1 chr3 57261765 57307496 +PDE12 chr3 57542003 57552571 +ARF4 chr3 57557090 57583947 +SLMAP chr3 57741177 57914895 +ABHD6 chr3 58223233 58281420 +RPP14 chr3 58303341 58303847 +PXK chr3 58318607 58411748 +PDHB chr3 58413357 58419584 +KCTD6 chr3 58477841 58488087 +ACOX2 chr3 58490863 58523046 +FHIT chr3 59735036 61237133 +PTPRG chr3 61547243 62283288 +C3orf14 chr3 62304648 62321888 +LINC00698 chr3 62936105 63110738 +THOC7 chr3 63819546 63849579 +PSMD6 chr3 63996225 64009658 +PRICKLE2 chr3 64079543 64431152 +PRICKLE2-AS1 chr3 64084949 64086798 +PRICKLE2-AS3 chr3 64173220 64186641 +MAGI1 chr3 65339200 66024509 +SLC25A26 chr3 66119285 66438540 +LRIG1 chr3 66429221 66551687 +KBTBD8 chr3 67048727 67061634 +SUCLG2 chr3 67410884 67705038 +TMF1 chr3 69068978 69101484 +UBA3 chr3 69103881 69129559 +ARL6IP5 chr3 69134095 69155217 +MITF chr3 69788586 70017488 +FOXP1 chr3 71003844 71633140 +SHQ1 chr3 72798428 72911065 +PPP4R2 chr3 73045936 73118350 +EBLN2 chr3 73110810 73112488 +FAM86DP chr3 75470703 75484261 +ZNF717 chr3 75758794 75834734 +ROBO1 chr3 78646390 79816965 +GBE1 chr3 81538850 81811312 +CADM2 chr3 85008132 86123579 +CHMP2B chr3 87276421 87304698 +CGGBP1 chr3 88101094 88199035 +ZNF654 chr3 88188254 88193815 +C3orf38 chr3 88198893 88217879 +PROS1 chr3 93591881 93692910 +ARL13B chr3 93698983 93774512 +DHFRL1 chr3 93766680 93782233 +NSUN3 chr3 93781760 93847389 +CRYBG3 chr3 97595819 97663810 +OR5K3 chr3 98109510 98110475 +CLDND1 chr3 98216756 98241910 +ST3GAL6 chr3 98451080 98540045 +CMSS1 chr3 99536678 99897447 +TBC1D23 chr3 99979844 100044095 +NIT2 chr3 100053545 100075710 +TOMM70A chr3 100082275 100120242 +LNP1 chr3 100120037 100175163 +TMEM45A chr3 100211463 100296288 +GPR128 chr3 100328433 100414323 +TFG chr3 100428205 100467810 +SENP7 chr3 101043049 101232085 +TRMT10C chr3 101280706 101285290 +PCNP chr3 101292939 101313281 +ZBTB11 chr3 101367733 101396339 +RPL24 chr3 101399935 101405626 +PDCL3P4 chr3 101431316 101432035 +CEP97 chr3 101442769 101489406 +NFKBIZ chr3 101546835 101579866 +ALCAM chr3 105085753 105295744 +CBLB chr3 105374305 105588396 +BBX chr3 107241783 107530171 +LINC00636 chr3 107602095 107647754 +CD47 chr3 107762145 107809872 +IFT57 chr3 107879659 107941417 +DZIP3 chr3 108308529 108413693 +MORC1 chr3 108677086 108836989 +PVRL3 chr3 110788918 110994410 +ABHD10 chr3 111697857 111712210 +CD200 chr3 112051194 112081659 +ATG3 chr3 112251356 112280893 +SLC35A5 chr3 112280556 112304424 +GTPBP8 chr3 112709765 112733907 +C3orf17 chr3 112721287 112738708 +SPICE1 chr3 113161565 113234034 +SIDT1 chr3 113251143 113348425 +KIAA2018 chr3 113367232 113415493 +NAA50 chr3 113435307 113465147 +ATP6V1A chr3 113465866 113530903 +QTRTD1 chr3 113724680 113807269 +ZNF80 chr3 113953483 113956425 +MIR568 chr3 114035322 114035416 +ZBTB20 chr3 114056941 114866118 +GAP43 chr3 115342171 115440337 +LSAMP chr3 115521235 117716095 +IGSF11 chr3 118619404 118864915 +C3orf30 chr3 118864997 118878889 +B4GALT4 chr3 118930579 118959950 +ARHGAP31 chr3 119013220 119139561 +TMEM39A chr3 119148347 119187677 +POGLUT1 chr3 119187785 119213555 +TIMMDC1 chr3 119217379 119243937 +POPDC2 chr3 119355304 119384171 +COX17 chr3 119373360 119396301 +GSK3B chr3 119540170 119813264 +LRRC58 chr3 120043356 120068186 +NDUFB4 chr3 120315156 120321347 +RABL3 chr3 120405528 120461840 +GTF2E1 chr3 120461484 120501916 +HCLS1 chr3 121350246 121379774 +GOLGB1 chr3 121382046 121468602 +IQCB1 chr3 121488610 121553926 +SLC15A2 chr3 121612936 121662949 +CD86 chr3 121774213 121839983 +CCDC58 chr3 122078438 122102078 +FAM162A chr3 122103023 122131181 +WDR5B chr3 122130715 122134882 +KPNA1 chr3 122140796 122233792 +PARP9 chr3 122246771 122283424 +DTX3L chr3 122283085 122294050 +HSPBAP1 chr3 122458846 122512671 +SEMA5B chr3 122628041 122747452 +SEC22A chr3 122920774 122992977 +PTPLB chr3 123209667 123304032 +MYLK-AS1 chr3 123304389 123363415 +MYLK chr3 123328896 123603178 +CCDC14 chr3 123616152 123680564 +KALRN chr3 123798870 124445172 +UMPS chr3 124449213 124464040 +SLC12A8 chr3 124801480 124998021 +ZNF148 chr3 124944405 125094198 +SNX4 chr3 125165495 125239041 +OSBPL11 chr3 125247702 125313934 +SLC41A3 chr3 125725198 125820404 +ALDH1L1-AS1 chr3 125822483 125826912 +ZXDC chr3 126156444 126194762 +NUP210P1 chr3 126379452 126390782 +CHCHD6 chr3 126423063 126679249 +PLXNA1 chr3 126707437 126756235 +TPRA1 chr3 127291912 127317094 +MCM2 chr3 127317066 127341276 +PODXL2 chr3 127348024 127391652 +MGLL chr3 127407909 127542051 +SEC61A1 chr3 127770484 127790526 +RUVBL1 chr3 127783621 127872757 +EEFSEC chr3 127872297 128127485 +RPN1 chr3 128338817 128399918 +RAB7A chr3 128444965 128533639 +ACAD9 chr3 128598439 128634910 +EFCC1 chr3 128720472 128759585 +ISY1 chr3 128846258 128880136 +CNBP chr3 128888327 128902765 +COPG1 chr3 128968449 128996614 +H1FX chr3 129033615 129035120 +RPL32P3 chr3 129101765 129118498 +SNORA7B chr3 129116053 129116191 +MBD4 chr3 129149787 129158878 +TMCC1 chr3 129366635 129612419 +FAM86HP chr3 129817935 129830315 +PIK3R4 chr3 130397779 130465673 +ATP2C1 chr3 130569439 130735556 +ASTE1 chr3 130732719 130746493 +NUDT16 chr3 131100515 131107674 +MRPL3 chr3 131181056 131221827 +CPNE4 chr3 131252399 132004254 +ACAD11 chr3 132276982 132379567 +UBA5 chr3 132373290 132396941 +TMEM108 chr3 132757235 133116636 +CDV3 chr3 133292574 133309105 +TOPBP1 chr3 133317019 133380737 +TF chr3 133464800 133497850 +SRPRB chr3 133502877 133544616 +RAB6B chr3 133543083 133614680 +AMOTL2 chr3 134074716 134094321 +ANAPC13 chr3 134196548 134205558 +CEP63 chr3 134204585 134293859 +EPHB1 chr3 134316643 134979309 +MSL2 chr3 135867764 135916083 +PCCB chr3 135969148 136056738 +STAG1 chr3 136055077 136471220 +NCK1 chr3 136581050 136668665 +IL20RB chr3 136665072 136729927 +DBR1 chr3 137879854 137893791 +ARMC8 chr3 137906109 138017231 +MRAS chr3 138066539 138124375 +CEP70 chr3 138213186 138313380 +FAIM chr3 138327448 138352218 +PIK3CB chr3 138372860 138553780 +MRPS22 chr3 138724648 139076065 +COPB2 chr3 139074442 139108574 +RBP1 chr3 139236276 139258671 +SLC25A36 chr3 140660672 140698775 +ACPL2 chr3 140947568 141013748 +ZBTB38 chr3 141043055 141168634 +RASA2 chr3 141205889 141334184 +RNF7 chr3 141457046 141466402 +ATP1B3 chr3 141594966 141645356 +TFDP2 chr3 141663277 141868386 +GK5 chr3 141882414 141944449 +XRN1 chr3 142025449 142166904 +PCOLCE2 chr3 142534764 142608045 +U2SURP chr3 142683339 142779567 +SLC9A9 chr3 142984064 143567373 +C3orf58 chr3 143690640 143767561 +PLSCR1 chr3 146232967 146262651 +GYG1 chr3 148709128 148745419 +HLTF chr3 148747914 148804341 +HPS3 chr3 148847371 148891519 +COMMD2 chr3 149456257 149470286 +RNF13 chr3 149530495 149679926 +PFN2 chr3 149682691 149768575 +TSC22D2 chr3 150126122 150184218 +SERP1 chr3 150259781 150321015 +EIF2A chr3 150264465 150302029 +SELT chr3 150320662 150348222 +GPR171 chr3 150915619 150920988 +P2RY13 chr3 151044100 151047336 +P2RY12 chr3 151055168 151102600 +MBNL1 chr3 151961617 152183569 +P2RY1 chr3 152552736 152555841 +RAP2B chr3 152880029 152886265 +ARHGEF26-AS1 chr3 153742190 153839121 +ARHGEF26 chr3 153838792 153975616 +DHX36 chr3 153990335 154042286 +MME chr3 154741913 154901497 +C3orf33 chr3 155480401 155524140 +SLC33A1 chr3 155538813 155572218 +GMPS chr3 155588325 155661815 +KCNAB1 chr3 155755490 156256545 +SSR3 chr3 156257929 156272973 +TIPARP chr3 156391024 156424559 +CCNL1 chr3 156864297 156878549 +RSRC1 chr3 157823644 158263519 +MLF1 chr3 158288952 158325041 +GFM1 chr3 158362067 158410364 +MFSD1 chr3 158449987 158547508 +SCHIP1 chr3 159557650 159615149 +IFT80 chr3 159974774 160117668 +PPM1L chr3 160473390 160796695 +B3GALNT1 chr3 160801671 160823172 +NMD3 chr3 160822484 160971320 +BCHE chr3 165490692 165555260 +PDCD10 chr3 167401086 167452727 +SERPINI1 chr3 167453031 167543356 +GOLIM4 chr3 167726465 167813763 +EGFEM1P chr3 167967310 168548387 +MYNN chr3 169490619 169507504 +LRRIQ4 chr3 169539710 169555563 +SEC62 chr3 169684423 169716161 +PHC3 chr3 169804520 169899537 +PRKCI chr3 169940153 170023769 +SKIL chr3 170075466 170114623 +CLDN11 chr3 170136653 170578169 +SLC7A14 chr3 170177372 170303863 +RPL22L1 chr3 170582664 170588272 +TNIK chr3 170779128 171178197 +TMEM212 chr3 171561139 171656505 +NCEH1 chr3 172348039 172429008 +NLGN1 chr3 173114074 174004434 +TBL1XR1 chr3 176737143 176915261 +ZMAT3 chr3 178735011 178790067 +MFN1 chr3 179065480 179112719 +GNB4 chr3 179116990 179169378 +ACTL6A chr3 179280668 179306196 +MRPL47 chr3 179306073 179322442 +NDUFB5 chr3 179322478 179345435 +USP13 chr3 179370543 179507189 +PEX5L chr3 179512746 179754841 +TTC14 chr3 180319918 180335616 +FXR1 chr3 180585929 180700541 +DNAJC19 chr3 180701497 180707562 +SOX2-OT chr3 180707558 181554668 +SOX2 chr3 181429714 181432221 +DCUN1D1 chr3 182655862 182703741 +MCCC1 chr3 182733006 182833863 +LAMP3 chr3 182840001 182881627 +B3GNT5 chr3 182971032 183016292 +KLHL24 chr3 183353356 183402265 +YEATS2 chr3 183415606 183530413 +MAP6D1 chr3 183533664 183543382 +PARL chr3 183547173 183602721 +ABCC5 chr3 183637722 183735803 +HTR3D chr3 183749332 183757157 +EIF2B5 chr3 183852826 184402546 +DVL3 chr3 183873176 183891398 +AP2M1 chr3 183892477 183901879 +ABCF3 chr3 183903811 183911800 +ALG3 chr3 183960089 183967336 +ECE2 chr3 183967438 184010819 +PSMD2 chr3 184016497 184026842 +EIF4G1 chr3 184032283 184053146 +FAM131A chr3 184053714 184064063 +POLR2H chr3 184079506 184086384 +EPHB3 chr3 184279572 184300197 +MAGEF1 chr3 184428156 184429836 +VPS8 chr3 184529931 184770402 +C3orf70 chr3 184795838 184870802 +MAP3K13 chr3 185000729 185206885 +TMEM41A chr3 185194284 185216845 +SENP2 chr3 185300284 185351339 +IGF2BP2 chr3 185361527 185542844 +TRA2B chr3 185633694 185655924 +ETV5 chr3 185764097 185828107 +DGKG chr3 185823457 186080026 +TBCCD1 chr3 186263862 186288332 +DNAJB11 chr3 186285192 186315061 +FETUB chr3 186353758 186370930 +EIF4A2 chr3 186500994 186507689 +RFC4 chr3 186507669 186524847 +ST6GAL1 chr3 186648274 186796341 +MASP1 chr3 186935942 187009810 +BCL6 chr3 187439165 187463515 +LPP chr3 187871072 188608460 +MIR944 chr3 189547711 189547798 +IL1RAP chr3 190231840 190375843 +CCDC50 chr3 191046866 191116459 +FGF12 chr3 191857184 192485553 +MB21D2 chr3 192514604 192635950 +ATP13A4 chr3 193119866 193310900 +OPA1 chr3 193310933 193415612 +LRRC15 chr3 194075976 194090472 +LSG1 chr3 194361517 194393206 +XXYLT1 chr3 194789008 194991896 +ACAP2 chr3 194995465 195163807 +PPP1R2 chr3 195241221 195270209 +APOD chr3 195295573 195311076 +SDHAP2 chr3 195384967 195412775 +TNK2 chr3 195590235 195638816 +SDHAP1 chr3 195686619 195717189 +TFRC chr3 195754054 195809060 +PCYT1A chr3 195941093 196014828 +TCTEX1D2 chr3 196018090 196045170 +UBXN7 chr3 196074533 196159345 +RNF168 chr3 196195654 196230639 +WDR53 chr3 196281056 196295545 +FBXO45 chr3 196295482 196315930 +PIGX chr3 196366646 196462878 +PAK2 chr3 196466728 196559518 +SENP5 chr3 196594727 196661585 +NCBP2 chr3 196662273 196669468 +DLG1 chr3 196769431 197026171 +KIAA0226 chr3 197398264 197476598 +MIR922 chr3 197401367 197401447 +FYTTD1 chr3 197464050 197514467 +LRCH3 chr3 197518097 197615307 +RPL35A chr3 197676858 197683481 +ZNF595 chr4 53179 88099 +ZNF876P chr4 206399 249774 +ZNF141 chr4 331603 378653 +ABCA11P chr4 419224 467918 +ZNF721 chr4 419604 492945 +PIGG chr4 492989 533985 +ATP5I chr4 666225 668127 +MYL5 chr4 667369 675822 +PCGF3 chr4 699537 764428 +GAK chr4 843064 926161 +TMEM175 chr4 926175 952444 +SPON2 chr4 1160720 1202750 +CTBP1 chr4 1205236 1243741 +MAEA chr4 1283639 1333935 +UVSSA chr4 1341054 1381837 +SLBP chr4 1694527 1714282 +TMEM129 chr4 1717679 1723085 +LETM1 chr4 1813206 1857974 +WHSC1 chr4 1873151 1983934 +NELFA chr4 1984441 2043630 +C4orf48 chr4 2043689 2045697 +HAUS3 chr4 2229191 2243891 +MXD4 chr4 2249159 2264021 +MIR4800 chr4 2251804 2251883 +RNF4 chr4 2463947 2627047 +FAM193A chr4 2626988 2734292 +TNIP2 chr4 2743375 2758103 +ADD1 chr4 2845584 2931803 +MFSD10 chr4 2932288 2936586 +GRK4 chr4 2965335 3042474 +RGS12 chr4 3294755 3441640 +LRPAP1 chr4 3508103 3534286 +TMEM128 chr4 4237269 4249950 +LYAR chr4 4269428 4291896 +ZBTB49 chr4 4291924 4323513 +NSG1 chr4 4349867 4420785 +STX18 chr4 4417469 4544073 +CYTL1 chr4 5016313 5021199 +EVC chr4 5712924 5830772 +CRMP1 chr4 5749811 5894785 +WFS1 chr4 6271576 6304992 +MAN2B2 chr4 6576902 6625089 +MRFAP1 chr4 6641818 6644472 +MRFAP1L1 chr4 6709428 6711607 +BLOC1S4 chr4 6717842 6719387 +KIAA0232 chr4 6783102 6885897 +TBC1D14 chr4 6910969 7034845 +TADA2B chr4 7043626 7059679 +GRPEL1 chr4 7060633 7069924 +AFAP1-AS1 chr4 7755817 7780655 +AFAP1 chr4 7760441 7941653 +SH3TC1 chr4 8183799 8243530 +ACOX3 chr4 8368009 8442450 +USP17L10 chr4 9212383 9213975 +USP17L22 chr4 9269345 9270937 +USP17L6P chr4 9369600 9370796 +WDR1 chr4 10075963 10118573 +HS3ST1 chr4 11394774 11431389 +RAB28 chr4 13362978 13485989 +CC2D2A chr4 15471489 15603180 +FBXL5 chr4 15606162 15683302 +FAM200B chr4 15683285 15707188 +CD38 chr4 15779898 15854853 +TAPT1-AS1 chr4 16228286 16321763 +QDPR chr4 17461884 17513857 +LAP3 chr4 17578815 17609595 +MED28 chr4 17616254 17635728 +DCAF16 chr4 17802278 17812381 +LCORL chr4 17842822 18023499 +PACRGL chr4 20697905 20754530 +GPR125 chr4 22346694 22517677 +DHX15 chr4 24519064 24586173 +ZCCHC4 chr4 25314407 25372005 +ANAPC4 chr4 25378835 25420120 +RBPJ chr4 26165077 26436541 +TBC1D19 chr4 26578059 26756973 +ARAP2 chr4 35949843 36246131 +DTHD1 chr4 36283244 36347378 +RELL1 chr4 37592422 37687998 +PGM2 chr4 37828255 37864558 +KLF3 chr4 38665817 38702663 +TLR1 chr4 38792298 38858438 +TMEM156 chr4 38968365 39034542 +KLHL5 chr4 39046659 39128477 +WDR19 chr4 39184024 39287430 +RFC1 chr4 39289076 39367995 +RPL9 chr4 39455744 39460568 +LIAS chr4 39460620 39479273 +UGDH chr4 39500375 39529931 +UGDH-AS1 chr4 39529639 39596327 +SMIM14 chr4 39547950 39640710 +UBE2K chr4 39699664 39784412 +PDS5A chr4 39824483 39979576 +MIR4802 chr4 40504057 40504136 +APBB2 chr4 40812044 41218731 +UCHL1 chr4 41258430 41270472 +LIMCH1 chr4 41361624 41702061 +TMEM33 chr4 41937137 41962589 +SLC30A9 chr4 41992489 42092474 +SHISA3 chr4 42399856 42404504 +ATP8A1 chr4 42410390 42659122 +GUF1 chr4 44680444 44702943 +GNPDA2 chr4 44684217 44728612 +GABRA4 chr4 46920917 46996424 +COMMD8 chr4 47452885 47465736 +NFXL1 chr4 47849257 47916653 +SLAIN2 chr4 48343339 48428229 +FRYL chr4 48499378 48782339 +OCIAD1 chr4 48807229 48863834 +DCUN1D4 chr4 52709166 52783003 +SGCB chr4 52886872 52904648 +SPATA18 chr4 52917497 52963458 +USP46 chr4 53457138 53525502 +DANCR chr4 53578597 53586518 +SCFD2 chr4 53739149 54232242 +FIP1L1 chr4 54243810 55161439 +LNX1-AS2 chr4 54459123 54470214 +CHIC2 chr4 54875956 54930857 +PDGFRA chr4 55095264 55164414 +SRD5A3 chr4 56212276 56239263 +TMEM165 chr4 56262124 56319564 +CLOCK chr4 56294070 56413305 +EXOC1 chr4 56719782 56771200 +AASDH chr4 57204456 57253666 +PPAT chr4 57259528 57301781 +PAICS chr4 57301907 57327534 +SRP72 chr4 57333081 57369839 +ARL9 chr4 57371509 57390614 +NOA1 chr4 57829536 57844989 +POLR2B chr4 57843888 57897334 +LPHN3 chr4 62066976 62944053 +EPHA5 chr4 66185281 66536213 +UBA6 chr4 68478370 68566897 +TMPRSS11B chr4 69092371 69111438 +YTHDC1 chr4 69176105 69215807 +UGT2B4 chr4 70345883 70391732 +CSN1S2AP chr4 70933103 70951007 +AMTN chr4 71384257 71398459 +UTP3 chr4 71554196 71556267 +RUFY3 chr4 71569921 71673032 +GRSF1 chr4 71681499 71705662 +MOB1B chr4 71768043 71888166 +DCK chr4 71858255 71896631 +GC chr4 72607410 72669758 +COX18 chr4 73921797 73935472 +ANKRD17 chr4 73939093 74124515 +IL8 chr4 74606223 74609433 +CXCL6 chr4 74702214 74714781 +EPGN chr4 75174190 75181024 +RCHY1 chr4 76404247 76439974 +THAP6 chr4 76439156 76475683 +G3BP2 chr4 76567966 76649709 +USO1 chr4 76649777 76735382 +NAAA chr4 76831809 76862204 +SDAD1 chr4 76862103 76912115 +NUP54 chr4 77035812 77069668 +SCARB2 chr4 77079890 77135046 +FAM47E chr4 77135193 77204933 +SEPT11 chr4 77870856 77961537 +CCNI chr4 77968311 77997158 +CNOT6L chr4 78634541 78740769 +MRPL1 chr4 78783674 78873944 +PAQR3 chr4 79808281 79860592 +FGF5 chr4 81187753 81257834 +RASGEF1B chr4 82347547 82965397 +HNRNPD chr4 83273651 83295656 +ENOPH1 chr4 83351715 83382244 +SCD5 chr4 83550692 83720010 +MIR575 chr4 83674490 83674583 +SEC31A chr4 83739814 83822319 +THAP9-AS1 chr4 83814162 83822113 +THAP9 chr4 83821837 83841122 +COPS4 chr4 83955600 83996971 +COQ2 chr4 84182689 84206067 +HELQ chr4 84328496 84377009 +MRPS18C chr4 84377085 84390888 +FAM175A chr4 84382092 84444501 +WDFY3 chr4 85590704 85887544 +ARHGAP24 chr4 86396267 86923823 +MAPK10 chr4 86936276 87515284 +PTPN13 chr4 87515468 87736324 +KLHL8 chr4 88081255 88161466 +HSD17B11 chr4 88257762 88312538 +NUDT9 chr4 88343734 88380606 +SPARCL1 chr4 88394487 88452213 +DSPP chr4 88529681 88538062 +SPP1 chr4 88896819 88904562 +PKD2 chr4 88928820 88998929 +PPM1K chr4 89178772 89205921 +PYURF chr4 89442136 89444964 +HERC3 chr4 89442199 89629693 +PIGY chr4 89442724 89442940 +NAP1L5 chr4 89617066 89619386 +FAM13A chr4 89647106 90032549 +SNCA chr4 90645250 90759466 +CCSER1 chr4 91048686 92523064 +GRID2 chr4 93225550 94695707 +SMARCAD1 chr4 95128762 95212443 +PDLIM5 chr4 95373037 95589377 +RAP1GDS1 chr4 99182535 99365012 +TSPAN5 chr4 99391518 99579780 +EIF4E chr4 99792835 99851788 +METAP1 chr4 99916771 99983964 +ADH5 chr4 99992132 100009952 +ADH1B chr4 100226121 100242558 +LAMTOR3 chr4 100799493 100815647 +DNAJB14 chr4 100817405 100867883 +H2AFZ chr4 100869243 100871545 +DDIT4L chr4 101107027 101111939 +PPP3CA chr4 101944566 102269435 +MANBA chr4 103552660 103682151 +UBE2D3 chr4 103715540 103790053 +CISD2 chr4 103790135 103810399 +SLC9B1 chr4 103806205 103940896 +SLC9B2 chr4 103941025 104006986 +CXXC4 chr4 105389469 105416058 +TET2 chr4 106067032 106200973 +PPA2 chr4 106290234 106395238 +INTS12 chr4 106603784 106817143 +GSTCD chr4 106629935 106768885 +TBCK chr4 106962756 107242652 +AIMP1 chr4 107236701 107270383 +PAPSS1 chr4 108511433 108641608 +CYP2U1 chr4 108852525 108874613 +HADH chr4 108910870 108956331 +RPL34 chr4 109541722 109551568 +OSTC chr4 109571740 109588976 +SEC24B chr4 110354928 110462052 +CASP6 chr4 110609785 110624739 +PLA2G12A chr4 110631145 110651233 +GAR1 chr4 110736666 110745893 +ELOVL6 chr4 110967002 111120355 +C4orf32 chr4 113066553 113116412 +AP1AR chr4 113152893 113191203 +LARP7 chr4 113558120 113578748 +MIR302A chr4 113569339 113569407 +ANK2 chr4 113739265 114304896 +CAMK2D chr4 114372188 114683083 +UGT8 chr4 115519611 115599380 +TRAM1L1 chr4 118004718 118006736 +SNHG8 chr4 119199864 119200978 +METTL14 chr4 119606523 119636588 +C4orf3 chr4 120218207 120225600 +PDE5A chr4 120415550 120550146 +ANXA5 chr4 122589110 122618268 +EXOSC9 chr4 122722472 122738176 +BBS7 chr4 122745595 122791652 +KIAA1109 chr4 123073488 123283913 +NUDT6 chr4 123809852 123844123 +SPATA5 chr4 123844229 124240605 +SPRY1 chr4 124317950 124324910 +ANKRD50 chr4 125585207 125633887 +HSPA4L chr4 128702976 128761888 +MFSD8 chr4 128838960 128887150 +C4orf29 chr4 128886461 128960866 +LARP1B chr4 128982423 129144086 +PGRMC2 chr4 129190397 129209984 +PHF17 chr4 129730779 129796379 +C4orf33 chr4 130014472 130037795 +PCDH10 chr4 134070470 134129356 +SLC7A11 chr4 139085251 139163503 +ELF2 chr4 139949266 140098372 +NDUFC1 chr4 140188034 140223705 +NAA15 chr4 140222609 140341187 +SETD7 chr4 140417095 140527853 +MAML3 chr4 140637907 141075338 +SCOC chr4 141178440 141306880 +ELMOD2 chr4 141445312 141474924 +RNF150 chr4 141780961 142134031 +ZNF330 chr4 142142041 142155851 +INPP4B chr4 142944313 143768585 +USP38 chr4 144106070 144144983 +GAB1 chr4 144257915 144395721 +SMARCA5 chr4 144434616 144478639 +HHIP chr4 145567173 145666423 +ANAPC10 chr4 145888264 146019693 +ABCE1 chr4 146019084 146050331 +SMAD1 chr4 146402346 146479231 +ZNF827 chr4 146678779 146859787 +LSM6 chr4 147096837 147121152 +SLC10A7 chr4 147175127 147443123 +POU4F2 chr4 147560045 147563626 +TMEM184C chr4 148538534 148593195 +PRMT10 chr4 148558936 148605381 +DCLK2 chr4 150999426 151178609 +LRBA chr4 151185594 151936879 +RPS3A chr4 152020725 152025804 +SH3D19 chr4 152023903 152246784 +PET112 chr4 152591656 152682175 +FBXW7 chr4 153242410 153457253 +MIR4453 chr4 153457416 153460415 +ARFIP1 chr4 153701089 153839615 +TRIM2 chr4 154073494 154260472 +KIAA0922 chr4 154387498 154557863 +TLR2 chr4 154622652 154626851 +RNF175 chr4 154631277 154681387 +PLRG1 chr4 155456158 155471587 +MAP9 chr4 156263810 156298122 +CTSO chr4 156845270 156875069 +PDGFC chr4 157681606 157892546 +GLRB chr4 157997209 158093242 +GRIA2 chr4 158125334 158287227 +TMEM144 chr4 159122756 159176563 +ETFDH chr4 159593277 159630775 +PPID chr4 159630286 159644548 +RAPGEF2 chr4 160025330 160281321 +NAF1 chr4 164031225 164088073 +TMA16 chr4 164415594 164441691 +MARCH1 chr4 164445450 165305202 +FAM218A chr4 165878100 165880273 +TMEM192 chr4 165995574 166129701 +MSMO1 chr4 166248775 166264312 +CPE chr4 166282346 166419472 +SPOCK3 chr4 167654535 168155947 +DDX60L chr4 169277886 169458937 +CBR4 chr4 169784921 169931426 +SH3RF1 chr4 170015407 170192256 +NEK1 chr4 170314426 170533780 +CLCN3 chr4 170533784 170644824 +C4orf27 chr4 170650616 170679104 +MFAP3L chr4 170907748 170954182 +AADAT chr4 170981373 171012850 +GALNT7 chr4 174089904 174245118 +HMGB2 chr4 174252846 174256276 +SCRG1 chr4 174305852 174327531 +FBXO8 chr4 175157809 175205415 +CEP44 chr4 175204828 175254531 +GLRA3 chr4 175558065 175750465 +GPM6A chr4 176554085 176923815 +SPCS3 chr4 177241115 177253396 +AGA chr4 178351924 178363657 +SNORD65 chr4 179607040 179607110 +CDKN2AIP chr4 184365744 184369351 +ING2 chr4 184426147 184432249 +RWDD4 chr4 184560788 184580378 +TRAPPC11 chr4 184580420 184634745 +STOX2 chr4 184774584 184944679 +IRF2 chr4 185308867 185395734 +CASP3 chr4 185548850 185570663 +ACSL1 chr4 185676749 185747972 +SLC25A4 chr4 186064395 186071536 +KIAA1430 chr4 186080819 186130658 +ANKRD37 chr4 186317175 186321782 +UFSP2 chr4 186320694 186347139 +CCDC110 chr4 186366336 186392913 +FAM149A chr4 187025573 187093821 +CYP4V2 chr4 187112674 187134610 +ZFP42 chr4 188916925 188926204 +FRG1 chr4 190861943 190884359 +DUX4L3 chr4 191008560 191010370 +CCDC127 chr5 196986 218330 +SDHA chr5 218356 256815 +PDCD6 chr5 271736 353971 +C5orf55 chr5 441645 443258 +EXOC3 chr5 443273 472052 +SLC9A3 chr5 473425 524447 +TPPP chr5 660883 693510 +ZDHHC11 chr5 710471 851101 +BRD9 chr5 850406 892939 +SLC6A19 chr5 1201710 1225232 +CLPTM1L chr5 1317859 1345214 +LPCAT1 chr5 1456595 1524092 +SDHAP3 chr5 1568637 1594735 +MRPL36 chr5 1798500 1801480 +NDUFS6 chr5 1801514 1816719 +MED10 chr5 6371994 6378707 +NSUN2 chr5 6599352 6633404 +SRD5A1 chr5 6633456 6669675 +MTRR chr5 7851299 7906138 +FASTKD3 chr5 7859272 7869150 +SEMA5A chr5 9035138 9546187 +FAM173B chr5 10226442 10250009 +CCT5 chr5 10250033 10266524 +CMBL chr5 10275987 10308138 +MARCH6 chr5 10353815 10440500 +DAP chr5 10679342 10761384 +CTNND2 chr5 10971952 11904155 +TRIO chr5 14143811 14532235 +FAM105A chr5 14581884 14615116 +ANKH chr5 14704910 14871887 +ZNF622 chr5 16451628 16465901 +MYO10 chr5 16665395 16936372 +BASP1 chr5 17065707 17276943 +GUSBP1 chr5 21341942 21589481 +CDH10 chr5 24487209 24645087 +CDH6 chr5 31193857 31329253 +DROSHA chr5 31400604 31532303 +C5orf22 chr5 31532373 31555165 +PDZD2 chr5 31639517 32111037 +GOLPH3 chr5 32124810 32174456 +MTMR12 chr5 32227100 32313115 +ZFR chr5 32354456 32444867 +SUB1 chr5 32531739 32604185 +TARS chr5 33440802 33469644 +AMACR chr5 33986283 34008220 +RAD1 chr5 34905369 34919094 +BRIX1 chr5 34915481 34926101 +DNAJC21 chr5 34929698 34959069 +LMBRD2 chr5 36098514 36152063 +SKP2 chr5 36152091 36184421 +SLC1A3 chr5 36606457 36688436 +NIPBL chr5 36876861 37066515 +C5orf42 chr5 37106330 37249530 +NUP155 chr5 37288239 37371283 +WDR70 chr5 37379314 37753537 +LIFR chr5 38475065 38608456 +RICTOR chr5 38938021 39074510 +FYB chr5 39105338 39274630 +DAB2 chr5 39371780 39462402 +PTGER4 chr5 40679600 40693837 +TTC33 chr5 40714577 40756077 +PRKAA1 chr5 40759481 40798476 +RPL37 chr5 40825364 40835437 +OXCT1 chr5 41730167 41870621 +C5orf51 chr5 41904290 41921738 +FBXO4 chr5 41925356 41941845 +GHR chr5 42423879 42721979 +SEPP1 chr5 42799982 42887494 +ZNF131 chr5 43065278 43192123 +NIM1 chr5 43192173 43280952 +HMGCS1 chr5 43289497 43313614 +C5orf28 chr5 43444354 43483995 +PAIP1 chr5 43526369 43557860 +NNT chr5 43602794 43707507 +MRPS30 chr5 44809027 44820530 +PARP8 chr5 49961733 50142356 +PELO chr5 52083774 52099880 +ITGA2 chr5 52285156 52390609 +MOCS2 chr5 52391509 52405893 +NDUFS4 chr5 52856463 52979168 +ARL15 chr5 53179775 53606412 +GZMA chr5 54398476 54406080 +DHX29 chr5 54552073 54603550 +SKIV2L2 chr5 54603588 54721409 +PPAP2A chr5 54720682 54830878 +MIR5687 chr5 54804678 54804754 +SLC38A9 chr5 54921673 55069022 +IL6ST chr5 55230923 55290821 +MIER3 chr5 56215429 56267502 +GPBP1 chr5 56469775 56560506 +PLK2 chr5 57749809 57756087 +PDE4D chr5 58264865 59817947 +ERCC8 chr5 60169658 60240900 +NDUFAF2 chr5 60240956 60448853 +SMIM15 chr5 60453536 60458301 +ZSWIM6 chr5 60628100 60841997 +KIF2A chr5 61601989 61833076 +DIMT1 chr5 61683081 61699766 +IPO11 chr5 61699799 61924409 +RNF180 chr5 63461671 63668696 +CWC27 chr5 64064757 64314590 +PPWD1 chr5 64859063 64883376 +TRIM23 chr5 64885507 64921802 +TRAPPC13 chr5 64920543 64962060 +SGTB chr5 64961755 65018862 +NLN chr5 65018023 65167553 +ERBB2IP chr5 65222303 65378377 +SREK1 chr5 65435799 65479443 +PIK3R1 chr5 67511548 67597649 +SLC30A5 chr5 68389473 68426896 +MRPS36 chr5 68513587 68525956 +CDK7 chr5 68530668 68573250 +TAF9 chr5 68646811 68665840 +RAD17 chr5 68665120 68710628 +GUSBP3 chr5 68790040 69006341 +GTF2H2C chr5 68856035 68890550 +SERF1B chr5 69321074 69338940 +SMN2 chr5 69345350 69374349 +GTF2H2B chr5 69711179 69743885 +SMN1 chr5 70220768 70249769 +NAIP chr5 70264310 70320941 +GTF2H2 chr5 70330784 70363516 +GUSBP9 chr5 70435716 70585611 +BDP1 chr5 70751442 70863649 +MCCC2 chr5 70883115 70954531 +CARTPT chr5 71014990 71016875 +MAP1B chr5 71403061 71505395 +MRPS27 chr5 71515236 71616473 +PTCD2 chr5 71616194 71656052 +TNPO1 chr5 72112139 72212560 +FCHO2 chr5 72251808 72386349 +TMEM174 chr5 72469022 72470970 +BTF3 chr5 72794233 72801460 +ANKRA2 chr5 72848160 72861511 +UTP15 chr5 72861268 72877794 +HEXB chr5 73935848 74018472 +GFM2 chr5 74017029 74063196 +NSA2 chr5 74062817 74072737 +FAM169A chr5 74073399 74162776 +HMGCR chr5 74632154 74657929 +COL4A3BP chr5 74664311 74807963 +POLK chr5 74807581 74896969 +POC5 chr5 74969949 75013313 +F2RL2 chr5 75911307 75919259 +F2R chr5 76011868 76031606 +AGGF1 chr5 76325076 76361059 +PDE8B chr5 76506274 76725632 +WDR41 chr5 76721795 76916436 +TBCA chr5 76986991 77164604 +AP3B1 chr5 77296349 77590579 +SCAMP1 chr5 77656339 77776562 +LHFPL2 chr5 77781038 78065844 +ARSB chr5 78073032 78281910 +JMY chr5 78532012 78623038 +PAPD4 chr5 78907943 78982471 +MTX3 chr5 79275584 79287082 +THBS4 chr5 79287134 79379110 +SERINC5 chr5 79407050 79551898 +ZFYVE16 chr5 79703832 79775169 +ANKRD34B chr5 79852574 79866307 +MTRNR2L2 chr5 79945819 79946855 +MSH3 chr5 79950467 80172279 +ZCCHC9 chr5 80597409 80609116 +SSBP2 chr5 80708840 81047616 +ATG10 chr5 81267844 81572676 +RPS23 chr5 81569177 81574396 +TMEM167A chr5 82348665 82373682 +XRCC4 chr5 82373317 82649606 +VCAN chr5 82767284 82878122 +HAPLN1 chr5 82933624 83017432 +EDIL3 chr5 83236373 83680611 +COX7C chr5 85913721 85916779 +MIR4280 chr5 86410696 86410771 +RASA1 chr5 86563705 86687748 +CCNH chr5 86687311 86708836 +TMEM161B chr5 87485450 87565293 +TMEM161B-AS1 chr5 87564712 87732502 +LINC00461 chr5 87803363 87986858 +MEF2C chr5 88013975 88199922 +CETN3 chr5 89688078 89705603 +MBLAC2 chr5 89754020 89770585 +POLR3G chr5 89767565 89810370 +LYSMD3 chr5 89811428 89825401 +ARRDC3 chr5 90664541 90679176 +FAM172A chr5 92953775 93447404 +MCTP1 chr5 94039446 94620279 +TTC37 chr5 94799599 94890711 +RHOBTB3 chr5 95049226 95160087 +GLRX chr5 95087023 95158709 +ELL2 chr5 95220802 95297775 +CAST chr5 95860971 96115299 +ERAP1 chr5 96096521 96143803 +LNPEP chr5 96271098 96373219 +LIX1 chr5 96427574 96478576 +RIOK2 chr5 96496571 96518964 +RGMB chr5 98104354 98134347 +CHD1 chr5 98190908 98262240 +FAM174A chr5 99871009 99922445 +ST8SIA4 chr5 100142639 100238970 +GIN1 chr5 102421704 102455855 +PPIP5K2 chr5 102455853 102548500 +C5orf30 chr5 102594403 102614361 +NUDT12 chr5 102884556 102898494 +RAB9BP1 chr5 104435174 104435799 +FER chr5 108083523 108532542 +PJA2 chr5 108670410 108745695 +SLC25A46 chr5 110073837 110100857 +WDR36 chr5 110427414 110466200 +STARD4 chr5 110831731 110848288 +NREP chr5 110998318 111333161 +EPB41L4A chr5 111478138 111755013 +EPB41L4A-AS1 chr5 111496223 111499973 +APC chr5 112043195 112181936 +SRP19 chr5 112196919 112205485 +REEP5 chr5 112212084 112258236 +DCP2 chr5 112312399 112356667 +YTHDC2 chr5 112849380 112930982 +KCNN2 chr5 113696642 113832337 +PGGT1B chr5 114546527 114598569 +CCDC112 chr5 114602885 114632528 +FEM1C chr5 114856608 114880591 +TMED7 chr5 114949205 114968689 +ATG12 chr5 115163893 115177555 +AP3S1 chr5 115177178 115249778 +COMMD10 chr5 115420688 115748459 +SEMA6A chr5 115779312 115910630 +DMXL1 chr5 118373467 118584833 +HSD17B4 chr5 118788138 118972894 +SRFBP1 chr5 121297656 121411265 +ZNF474 chr5 121465208 121515312 +SNX2 chr5 122110691 122165803 +SNX24 chr5 122179134 122365049 +CSNK1G3 chr5 122847793 122952739 +ZNF608 chr5 123972608 124084500 +GRAMD3 chr5 125695824 125832186 +ALDH7A1 chr5 125877533 125931110 +PHAX chr5 125935960 125962944 +MARCH3 chr5 126203406 126366500 +PRRC1 chr5 126853301 126890781 +SLC12A2 chr5 127419458 127525380 +FBN2 chr5 127593601 127994878 +ISOC1 chr5 128430444 128449721 +HINT1 chr5 130494720 130507428 +LYRM7 chr5 130506503 130541119 +CDC42SE2 chr5 130581186 130734140 +RAPGEF6 chr5 130759614 130970929 +FNIP1 chr5 130977407 131132710 +ACSL6 chr5 131142683 131347936 +SLC22A5 chr5 131705444 131731306 +C5orf56 chr5 131746328 131811736 +IRF1 chr5 131817301 131826490 +RAD50 chr5 131891711 131980313 +KIF3A chr5 132028320 132073330 +SEPT8 chr5 132086509 132142933 +SOWAHA chr5 132149033 132152488 +UQCRQ chr5 132202252 132203723 +LEAP2 chr5 132208014 132210738 +AFF4 chr5 132211071 132299326 +ZCCHC10 chr5 132332677 132362296 +HSPA4 chr5 132387654 132442141 +C5orf15 chr5 133291201 133304478 +VDAC1 chr5 133307606 133340824 +TCF7 chr5 133450402 133487556 +SKP1 chr5 133484633 133512729 +PPP2CA chr5 133530025 133561833 +CDKL3 chr5 133541305 133706738 +UBE2B chr5 133706870 133727683 +CDKN2AIPNL chr5 133737778 133747589 +SAR1B chr5 133936834 133984961 +SEC24A chr5 133984479 134063513 +CAMLG chr5 134074191 134087847 +DDX46 chr5 134094469 134190823 +C5orf24 chr5 134181370 134195427 +TXNDC15 chr5 134209493 134237215 +PCBD2 chr5 134240596 134343649 +MIR4461 chr5 134263729 134263802 +H2AFY chr5 134669590 134735604 +TGFBI chr5 135364584 135399507 +SMAD5 chr5 135468534 135524435 +SPOCK1 chr5 136310987 136934068 +HNRNPA0 chr5 137087075 137090039 +MYOT chr5 137203480 137223540 +FAM13B chr5 137273649 137387650 +BRD8 chr5 137475455 137514675 +CDC23 chr5 137523339 137549032 +FAM53C chr5 137667624 137685416 +KDM3B chr5 137688285 137772717 +REEP2 chr5 137774706 137782658 +EGR1 chr5 137801179 137805004 +ETF1 chr5 137841784 137878989 +HSPA9 chr5 137890571 137911133 +CTNNA1 chr5 137946656 138270723 +LRRTM2 chr5 138204612 138211057 +SIL1 chr5 138282409 138629246 +MATR3 chr5 138609441 138667360 +SNORA74A chr5 138614470 138614667 +PAIP2 chr5 138677276 138705406 +DNAJC18 chr5 138744279 138780180 +TMEM173 chr5 138855119 138862520 +UBE2D2 chr5 138906016 139008018 +CXXC5 chr5 139026884 139063467 +PSD2 chr5 139175406 139224051 +PURA chr5 139487362 139496321 +IGIP chr5 139505521 139508391 +CYSTM1 chr5 139554227 139661637 +PFDN1 chr5 139624624 139682706 +HBEGF chr5 139712428 139726216 +ANKHD1 chr5 139781399 139929163 +ANKHD1-EIF4EBP3 chr5 139781493 139929154 +SRA1 chr5 139916925 139937895 +EIF4EBP3 chr5 139927251 139929163 +APBB3 chr5 139937853 139973337 +SLC35A4 chr5 139944041 139948688 +CD14 chr5 140011313 140013286 +NDUFA2 chr5 140018325 140027370 +TMCO6 chr5 140019012 140024993 +IK chr5 140026643 140042064 +WDR55 chr5 140044261 140053709 +DND1 chr5 140050379 140053171 +HARS chr5 140052758 140071609 +HARS2 chr5 140071011 140078889 +ZMAT2 chr5 140078265 140086248 +PCDHA3 chr5 140180783 140391929 +PCDHA13 chr5 140261793 140391929 +PCDHB7 chr5 140552243 140555957 +PCDHB8 chr5 140557371 140560081 +PCDHB16 chr5 140560980 140565793 +PCDHB9 chr5 140566893 140571111 +PCDHB11 chr5 140579183 140582618 +PCDHB12 chr5 140588269 140591696 +PCDHB13 chr5 140593509 140596993 +PCDHB14 chr5 140602931 140605858 +PCDHB19P chr5 140619518 140621864 +TAF7 chr5 140698057 140700330 +PCDHGB1 chr5 140729828 140892546 +PCDHGB2 chr5 140739703 140892546 +PCDHGA5 chr5 140743898 140892546 +PCDHGA7 chr5 140762467 140892546 +PCDHGB4 chr5 140767452 140892546 +PCDHGA10 chr5 140792743 140892546 +PCDHGB7 chr5 140797427 140892546 +PCDHGA11 chr5 140800762 140891835 +PCDHGC3 chr5 140855580 140892542 +PCDHGC4 chr5 140864741 140892546 +DIAPH1 chr5 140894583 140998622 +HDAC3 chr5 141000443 141016437 +ARAP3 chr5 141032968 141061788 +PCDH1 chr5 141232938 141258811 +KIAA0141 chr5 141303373 141321612 +RNF14 chr5 141337893 141369856 +GNPDA1 chr5 141371314 141392606 +NDFIP1 chr5 141488070 141534005 +FGF1 chr5 141971743 142077617 +ARHGAP26-AS1 chr5 142239169 142248487 +NR3C1 chr5 142657496 142815077 +YIPF5 chr5 143537723 143550278 +LARS chr5 145492601 145562223 +RBM27 chr5 145583113 145718814 +TCERG1 chr5 145826874 145891524 +PPP2R2B chr5 145967936 146464347 +DPYSL3 chr5 146770374 146889619 +JAKMIP2 chr5 146967990 147162338 +SPINK1 chr5 147204131 147211349 +FBXO38 chr5 147763498 147822399 +HTR4 chr5 147830595 148056798 +ADRB2 chr5 148206156 148208196 +GRPEL2 chr5 148724993 148734146 +PCYOX1L chr5 148737570 148749216 +MIR145 chr5 148809849 148812397 +CSNK1A1 chr5 148871760 148931007 +HMGXB3 chr5 149379884 149432386 +CSF1R chr5 149432854 149492935 +CDX1 chr5 149546358 149564120 +TCOF1 chr5 149737202 149779871 +CD74 chr5 149781200 149792492 +RPS14 chr5 149822753 149829319 +NDST1 chr5 149865381 149937773 +RBM22 chr5 150070356 150080669 +DCTN4 chr5 150088002 150138671 +ZNF300 chr5 150273954 150284545 +ZNF300P1 chr5 150310207 150325851 +ANXA6 chr5 150480273 150537443 +GM2A chr5 150591711 150650001 +SLC36A1 chr5 150816607 150871942 +SPARC chr5 151040657 151066726 +ATOX1 chr5 151121877 151152093 +G3BP1 chr5 151150606 151192346 +GRIA1 chr5 152869175 153193429 +FAM114A2 chr5 153369688 153418496 +MFAP3 chr5 153418466 153600038 +GALNT10 chr5 153570290 153800544 +LARP1 chr5 154092462 154197167 +CNOT8 chr5 154237113 154256353 +MRPL22 chr5 154320630 154348971 +HAVCR2 chr5 156512843 156569880 +MED7 chr5 156564423 156586030 +CYFIP2 chr5 156693089 156822606 +THG1L chr5 157158205 157168456 +CLINT1 chr5 157212751 157286183 +RNF145 chr5 158584417 158637061 +UBLCP1 chr5 158690089 158713044 +TTC1 chr5 159436120 159492550 +PWWP2A chr5 159488808 159546430 +SLU7 chr5 159828648 159848718 +PTTG1 chr5 159848829 159855748 +GABRG2 chr5 161494546 161582542 +CCNG1 chr5 162864575 162873157 +NUDCD2 chr5 162873532 162887146 +MAT2B chr5 162930120 162946342 +RARS chr5 167913450 167946304 +PANK3 chr5 167975500 168006605 +MIR218-2 chr5 168195151 168195260 +SPDL1 chr5 169010638 169031782 +LCP2 chr5 169673241 169725231 +KCNMB1 chr5 169801675 169816681 +RANBP17 chr5 170288874 170727019 +NPM1 chr5 170814120 170838141 +FBXW11 chr5 171288553 171433877 +EFCAB9 chr5 171621176 171630456 +UBTD2 chr5 171636644 171711075 +DUSP1 chr5 172195093 172198198 +ERGIC1 chr5 172261278 172379688 +RPL26L1 chr5 172385732 172396774 +ATP6V0E1 chr5 172410760 172462448 +CREBRF chr5 172483355 172566291 +BNIP1 chr5 172571445 172591390 +BOD1 chr5 173034517 173043663 +CPEB4 chr5 173315283 173388979 +SFXN1 chr5 174904065 174956745 +CPLX2 chr5 175223313 175311023 +THOC3 chr5 175344876 175461683 +KIAA1191 chr5 175773064 175788971 +NOP16 chr5 175810949 175815976 +HIGD2A chr5 175815748 175816772 +CLTB chr5 175819456 175843570 +FAF2 chr5 175874629 175937075 +RNF44 chr5 175953698 175965026 +HK3 chr5 176307870 176326333 +UIMC1 chr5 176332006 176449634 +ZNF346 chr5 176449697 176508190 +NSD1 chr5 176560026 176727216 +RAB24 chr5 176728199 176730745 +PRELID1 chr5 176730775 176733960 +LMAN2 chr5 176758563 176778853 +SLC34A1 chr5 176806236 176825849 +DBN1 chr5 176883609 176901402 +PDLIM7 chr5 176910395 176924607 +DDX41 chr5 176938578 176944470 +FAM193B chr5 176946789 176981542 +TMED9 chr5 177019159 177023125 +B4GALT7 chr5 177027101 177037348 +RMND5B chr5 177557997 177577566 +NHP2 chr5 177576461 177580968 +HNRNPAB chr5 177631508 177638164 +CLK4 chr5 178029665 178057616 +ZNF354A chr5 178138593 178157703 +ZNF354B chr5 178286954 178315123 +ZFP2 chr5 178322895 178360213 +ZNF354C chr5 178487416 178510538 +RUFY1 chr5 178977559 179037027 +HNRNPH1 chr5 179041179 179061785 +CANX chr5 179105629 179157926 +MAML1 chr5 179159851 179223512 +LTC4S chr5 179220981 179223648 +MGAT4B chr5 179224597 179233952 +MIR1229 chr5 179225278 179225346 +SQSTM1 chr5 179233388 179265078 +C5orf45 chr5 179261436 179289173 +TBC1D9B chr5 179289066 179334859 +RNF130 chr5 179338651 179499118 +RASGEF1C chr5 179527795 179636153 +MAPK9 chr5 179660143 179719099 +CNOT6 chr5 179921412 180005405 +SCGB3A1 chr5 180017103 180018540 +MGAT1 chr5 180217541 180242652 +LINC00847 chr5 180257957 180262726 +ZFP62 chr5 180274611 180288286 +OR2V2 chr5 180581943 180582890 +GNB2L1 chr5 180663909 180675096 +TRIM52 chr5 180681417 180688119 +IRF4 chr6 391739 411447 +EXOC2 chr6 485133 693117 +GMDS chr6 1624041 2245926 +SERPINB1 chr6 2832566 2842240 +SERPINB9 chr6 2887500 2903514 +SERPINB6 chr6 2948393 2972090 +NQO2 chr6 2988221 3019996 +BPHL chr6 3118608 3153812 +TUBB2A chr6 3153903 3157760 +TUBB2B chr6 3224495 3231964 +PSMG4 chr6 3231637 3303607 +PXDC1 chr6 3722848 3752260 +PRPF4B chr6 4021501 4065217 +ECI2 chr6 4115923 4135831 +CDYL chr6 4706393 4955785 +RPP40 chr6 4994966 5004297 +PPP1R3G chr6 5085720 5087455 +LYRM4 chr6 5102827 5261172 +FARS2 chr6 5261277 5771813 +LY86 chr6 6588341 6655216 +SSR1 chr6 7268539 7347679 +CAGE1 chr6 7326889 7389976 +RIOK1 chr6 7389729 7418270 +SNRNP48 chr6 7590432 7612200 +BLOC1S5 chr6 8013800 8064647 +EEF1E1 chr6 8073593 8102811 +SLC35B3 chr6 8413301 8435794 +GCNT2 chr6 10492456 10629601 +PAK1IP1 chr6 10694928 10710015 +TMEM14C chr6 10723148 10731362 +TMEM14B chr6 10747992 10852986 +SYCP2L chr6 10748027 10979553 +NEDD9 chr6 11183531 11382581 +HIVEP1 chr6 12008995 12165232 +PHACTR1 chr6 12717893 13288645 +TBC1D7 chr6 13266774 13328815 +SIRT5 chr6 13574816 13614790 +NOL7 chr6 13615559 13632971 +RANBP9 chr6 13621730 13711796 +CD83 chr6 14117872 14137149 +JARID2 chr6 15246527 15522252 +DTNBP1 chr6 15523032 15663289 +MYLIP chr6 16129356 16148479 +ATXN1 chr6 16299343 16761722 +CAP2 chr6 17393447 17558023 +FAM8A1 chr6 17600586 17611950 +NUP153 chr6 17615269 17706656 +KIF13A chr6 17759414 17987854 +TPMT chr6 18128542 18155305 +DEK chr6 18224099 18265054 +MBOAT1 chr6 20100935 20212670 +E2F3 chr6 20402398 20493941 +CDKAL1 chr6 20534688 21232635 +SOX4 chr6 21593972 21598847 +NRSN1 chr6 24126350 24155128 +MRS2 chr6 24403153 24425810 +GPLD1 chr6 24424793 24495433 +ALDH5A1 chr6 24495080 24537435 +TDP2 chr6 24650205 24667261 +ACOT13 chr6 24667263 24705293 +C6orf62 chr6 24705089 24721064 +GMNN chr6 24775159 24786327 +HIST1H2AA chr6 25726291 25726790 +HIST1H4A chr6 26021907 26022278 +HIST1H1C chr6 26055968 26056699 +HIST1H4C chr6 26104104 26104518 +HIST1H2BC chr6 26115101 26124154 +HIST1H2AC chr6 26124373 26139344 +HIST1H1E chr6 26156559 26157343 +HIST1H2BD chr6 26158349 26171577 +HIST1H4E chr6 26204858 26206266 +HIST1H2BG chr6 26216428 26216872 +HIST1H4H chr6 26281283 26285762 +BTN3A2 chr6 26365387 26378546 +BTN3A1 chr6 26402465 26415444 +BTN3A3 chr6 26440700 26453643 +BTN2A1 chr6 26458150 26476849 +HMGN4 chr6 26538633 26546482 +ABT1 chr6 26597180 26600278 +ZNF322 chr6 26636518 26659980 +GUSBP2 chr6 26839263 26924333 +HIST1H2BK chr6 27106073 27114619 +PRSS16 chr6 27215480 27224403 +ZNF184 chr6 27418522 27440897 +HIST1H2BM chr6 27782822 27783267 +HIST1H2AM chr6 27860477 27860963 +ZSCAN16 chr6 28092338 28097860 +ZKSCAN8 chr6 28109688 28127250 +ZSCAN9 chr6 28192664 28201260 +ZKSCAN4 chr6 28212401 28227011 +PGBD1 chr6 28249314 28270326 +ZKSCAN3 chr6 28317691 28336947 +ZSCAN12 chr6 28346732 28367511 +TRIM27 chr6 28870779 28891766 +OR11A1 chr6 29393281 29424848 +GABBR1 chr6 29523406 29601753 +MOG chr6 29624758 29640149 +HLA-F chr6 29690552 29706305 +HLA-H chr6 29855350 29858259 +HLA-A chr6 29909037 29913661 +HLA-J chr6 29974360 29977733 +ZNRD1 chr6 30026676 30032686 +PPP1R11 chr6 30034486 30038110 +TRIM26 chr6 30152232 30181204 +HLA-L chr6 30227361 30260791 +HCG18 chr6 30255174 30294927 +TRIM39 chr6 30294256 30311506 +RPP21 chr6 30312908 30314661 +HLA-E chr6 30457244 30461982 +GNL1 chr6 30509154 30524951 +PRR3 chr6 30524663 30531500 +ABCF1 chr6 30539153 30564956 +PPP1R10 chr6 30568177 30586389 +MRPS18B chr6 30585486 30594172 +ATAT1 chr6 30594619 30614600 +C6orf136 chr6 30614816 30620987 +DHX16 chr6 30620896 30640814 +NRM chr6 30655824 30659197 +MDC1 chr6 30667584 30685666 +TUBB chr6 30687978 30693203 +FLOT1 chr6 30695486 30710510 +IER3 chr6 30710976 30712331 +DDR1 chr6 30844198 30867933 +GTF2H4 chr6 30875961 30881883 +VARS2 chr6 30876019 30894236 +C6orf15 chr6 31079000 31080336 +CCHCR1 chr6 31110216 31126015 +POU5F1 chr6 31132119 31148508 +HLA-C chr6 31236526 31239907 +HLA-B chr6 31321649 31324965 +DDX39B chr6 31497996 31510225 +ATP6V1G2 chr6 31512239 31516204 +NFKBIL1 chr6 31514647 31526606 +TNF chr6 31543344 31546113 +LST1 chr6 31553901 31556686 +AIF1 chr6 31582961 31584798 +PRRC2A chr6 31588497 31605548 +BAG6 chr6 31606805 31620482 +C6orf47 chr6 31626075 31628549 +GPANK1 chr6 31629006 31634060 +CSNK2B chr6 31633013 31638120 +LY6G5B chr6 31637944 31641553 +LY6G5C chr6 31644461 31651817 +ABHD16A chr6 31654726 31671221 +C6orf25 chr6 31686371 31694491 +DDAH2 chr6 31694815 31698394 +CLIC1 chr6 31698358 31707540 +MSH5 chr6 31707725 31732622 +VARS chr6 31745295 31763730 +LSM2 chr6 31765173 31774761 +HSPA1L chr6 31777396 31783437 +HSPA1A chr6 31783291 31785723 +HSPA1B chr6 31795512 31798031 +C6orf48 chr6 31802385 31807541 +NEU1 chr6 31825436 31830683 +EHMT2 chr6 31847536 31865464 +C2 chr6 31865562 31913449 +ZBTB12 chr6 31867384 31869769 +NELFE chr6 31919864 31926887 +SKIV2L chr6 31926857 31937532 +STK19 chr6 31938868 31950598 +CYP21A2 chr6 32006042 32009447 +ATF6B chr6 32065953 32096030 +PPT2 chr6 32121218 32134011 +PPT2-EGFL8 chr6 32121622 32139755 +EGFL8 chr6 32132360 32136058 +AGPAT1 chr6 32135989 32145873 +AGER chr6 32148745 32152101 +HLA-DRA chr6 32407619 32412823 +HLA-DRB5 chr6 32485120 32498064 +HLA-DRB1 chr6 32546546 32557625 +HLA-DQA1 chr6 32595956 32614839 +HLA-DQB1 chr6 32627244 32636160 +TAP2 chr6 32789610 32806557 +PSMB8 chr6 32808494 32812480 +PSMB9 chr6 32811913 32827362 +TAP1 chr6 32812986 32821755 +HLA-DMB chr6 32902406 32908847 +HLA-DMA chr6 32916390 32936871 +BRD2 chr6 32936437 32949282 +HLA-DPA1 chr6 33032346 33048552 +HLA-DPB1 chr6 33043703 33054978 +COL11A2 chr6 33130458 33160276 +RXRB chr6 33161365 33168630 +SLC39A7 chr6 33168222 33172216 +HSD17B8 chr6 33172419 33174608 +RING1 chr6 33176272 33180499 +VPS52 chr6 33218049 33239824 +RPS18 chr6 33239787 33244287 +B3GALT4 chr6 33244917 33252609 +WDR46 chr6 33246885 33257304 +PFDN6 chr6 33257079 33266178 +RGL2 chr6 33259431 33267101 +TAPBP chr6 33267471 33282164 +DAXX chr6 33286335 33297046 +PHF1 chr6 33378176 33384230 +CUTA chr6 33384219 33386094 +ZBTB9 chr6 33422356 33425325 +BAK1 chr6 33540329 33548019 +GGNBP1 chr6 33551515 33556803 +LEMD2 chr6 33738979 33756913 +HMGA1 chr6 34204650 34214008 +C6orf1 chr6 34214157 34217247 +NUDT3 chr6 34247456 34360451 +RPS10-NUDT3 chr6 34256547 34393825 +RPS10 chr6 34385231 34393902 +C6orf106 chr6 34555065 34664636 +SNRPC chr6 34725183 34741571 +UHRF1BP1 chr6 34759857 34850915 +TAF11 chr6 34845555 34855866 +TCP11 chr6 35085848 35116387 +ZNF76 chr6 35226686 35263762 +DEF6 chr6 35265595 35289548 +PPARD chr6 35310335 35395968 +RPL10A chr6 35436185 35438562 +FKBP5 chr6 35541362 35696360 +MIR5690 chr6 35632494 35632566 +SRPK1 chr6 35800743 35889119 +MAPK14 chr6 35995488 36079013 +BRPF3 chr6 36164521 36200567 +KCTD20 chr6 36410544 36458920 +STK38 chr6 36461669 36515247 +SRSF3 chr6 36562145 36573377 +CDKN1A chr6 36644305 36655116 +RAB44 chr6 36683256 36699870 +CPNE5 chr6 36708552 36807778 +PPIL1 chr6 36822603 36842800 +C6orf89 chr6 36839646 36896740 +MTCH1 chr6 36935917 36954074 +FGD2 chr6 36973422 36996846 +PIM1 chr6 37137979 37143202 +RNF8 chr6 37321748 37362514 +CCDC167 chr6 37450696 37467698 +ZFAND3 chr6 37787275 38122400 +BTBD9 chr6 38136227 38607924 +GLO1 chr6 38643701 38670917 +SAYSD1 chr6 39071840 39082965 +KCNK5 chr6 39156749 39197226 +MOCS1 chr6 39867354 39902290 +OARD1 chr6 41001366 41065526 +TSPO2 chr6 41010293 41012076 +NFYA chr6 41040684 41067715 +TREM2 chr6 41126244 41130924 +TREM1 chr6 41235664 41254457 +TOMM6 chr6 41755400 41757636 +USP49 chr6 41757634 41863099 +MED20 chr6 41873092 41888877 +BYSL chr6 41888926 41900784 +CCND3 chr6 41902671 42018095 +TAF8 chr6 42018251 42055199 +MRPS10 chr6 42174539 42185603 +TRERF1 chr6 42192669 42419789 +UBR2 chr6 42531800 42661242 +TBCC chr6 42712219 42714558 +RPL7L1 chr6 42847356 42857663 +C6orf226 chr6 42858005 42858554 +CNPY3 chr6 42896938 42907025 +PEX6 chr6 42931608 42946958 +PPP2R5D chr6 42952237 42980080 +MEA1 chr6 42979832 42981706 +KLHDC3 chr6 42981951 42989036 +RRP36 chr6 42989383 43001894 +KLC4 chr6 43008515 43042837 +MRPL2 chr6 43021767 43027544 +CUL9 chr6 43149913 43192325 +DNPH1 chr6 43193367 43197222 +ABCC10 chr6 43395104 43418168 +TJAP1 chr6 43445261 43474294 +POLR1C chr6 43477440 43497323 +YIPF3 chr6 43479565 43484728 +XPO5 chr6 43490072 43543812 +POLH chr6 43543887 43586701 +MAD2L1BP chr6 43597277 43608689 +MRPS18A chr6 43639040 43655528 +MRPL14 chr6 44081194 44095194 +TMEM63B chr6 44094651 44123256 +SLC29A1 chr6 44187242 44201888 +HSP90AB1 chr6 44214824 44221620 +SLC35B2 chr6 44221833 44225291 +AARS2 chr6 44267391 44281063 +CDC5L chr6 44355262 44418163 +SUPT3H chr6 44777054 45345690 +ENPP4 chr6 46097730 46114436 +ENPP5 chr6 46126924 46138708 +RCAN2 chr6 46188475 46459709 +SLC25A27 chr6 46620678 46645930 +PLA2G7 chr6 46671938 46703430 +TNFRSF21 chr6 47199268 47277641 +CD2AP chr6 47445525 47594999 +PTCHD4 chr6 47845764 48036425 +MUT chr6 49398073 49430904 +CENPQ chr6 49431091 49460820 +CRISP2 chr6 49660073 49681274 +TFAP2B chr6 50786436 50815326 +MCM3 chr6 52128807 52149679 +PAQR8 chr6 52226219 52272575 +EFHC1 chr6 52285106 52387892 +TMEM14A chr6 52535907 52551386 +GSTA4 chr6 52842751 52860176 +ICK chr6 52866077 52926600 +ELOVL5 chr6 53132196 53213947 +RPS16P5 chr6 53201741 53202144 +GCLC chr6 53362139 53481768 +LRRC1 chr6 53659295 53788919 +HCRTR2 chr6 55039050 55147418 +DST chr6 56322785 56819426 +BEND6 chr6 56819773 56892140 +KIAA1586 chr6 56911347 56920023 +ZNF451 chr6 56951642 57035105 +BAG2 chr6 57037124 57054631 +RAB23 chr6 57053607 57087078 +PRIM2 chr6 57179603 57513375 +GUSBP4 chr6 58246051 58256569 +KHDRBS2 chr6 62389865 62996132 +PTP4A1 chr6 64231666 64293492 +PHF3 chr6 64345725 64489229 +BAI3 chr6 69345259 70099403 +LMBRD1 chr6 70385694 70507003 +FAM135A chr6 71122644 71270877 +C6orf57 chr6 71276620 71299272 +SMAP1 chr6 71377479 71571718 +MIR30A chr6 72113254 72113324 +DDX43 chr6 74104471 74127292 +MTO1 chr6 74171301 74218959 +EEF1A1 chr6 74225473 74233520 +SLC17A5 chr6 74303102 74363878 +COX7A2 chr6 75947391 75960039 +TMEM30A chr6 75962640 75994684 +SENP6 chr6 76311225 76427997 +MYO6 chr6 76458909 76629254 +PHIP chr6 79645584 79787953 +HMGN3 chr6 79910962 79944406 +SH3BGRL2 chr6 80341000 80413372 +TTK chr6 80713604 80752244 +BCKDHB chr6 80816364 81055987 +IBTK chr6 82879700 82957471 +UBE3D chr6 83602117 83775560 +DOPEY1 chr6 83777385 83881069 +PGM3 chr6 83870869 83903655 +RWDD2A chr6 83903098 83908651 +PRSS35 chr6 84222194 84235423 +RIPPLY2 chr6 84562985 84567234 +CYB5R4 chr6 84569362 84677143 +SNX14 chr6 86215214 86303874 +SYNCRIP chr6 86318053 86353510 +SNHG5 chr6 86370710 86388451 +ZNF292 chr6 87862551 87973914 +SMIM8 chr6 88032301 88052043 +SLC35A1 chr6 88180341 88222054 +RARS2 chr6 88224096 88299721 +ORC3 chr6 88299839 88377169 +AKIRIN2 chr6 88384790 88411927 +CNR1 chr6 88849583 88876078 +RNGTT chr6 89319985 89673348 +PNRC1 chr6 89790470 89794879 +SRSF12 chr6 89805678 89827800 +PM20D2 chr6 89855769 89875284 +GABRR2 chr6 89966927 90025018 +UBE2J1 chr6 90036344 90062567 +RRAGD chr6 90074355 90121989 +ANKRD6 chr6 90142889 90343553 +LYRM2 chr6 90277863 90348472 +CASP8AP2 chr6 90539613 90584155 +MAP3K7 chr6 91223292 91296764 +MANEA chr6 96025419 96057333 +FUT9 chr6 96463860 96663488 +UFL1 chr6 96969471 97003152 +NDUFAF4 chr6 97337189 97345757 +KLHL32 chr6 97372605 97588630 +MMS22L chr6 97590037 97731093 +POU3F2 chr6 99282580 99286660 +FBXL4 chr6 99316420 99395849 +COQ3 chr6 99817276 99842080 +PNISR chr6 99845927 99873207 +CCNC chr6 99990256 100016849 +ASCC3 chr6 100956070 101329248 +GRIK2 chr6 101846664 102517958 +HACE1 chr6 105175968 105307794 +BVES-AS1 chr6 105584183 105617820 +PREP chr6 105725440 105850959 +PRDM1 chr6 106534195 106557814 +ATG5 chr6 106632351 106773666 +RTN4IP1 chr6 107018903 107077373 +QRSL1 chr6 107077453 107116292 +C6orf203 chr6 107349407 107372546 +PDSS2 chr6 107473761 107780768 +SEC63 chr6 108188960 108279393 +OSTM1 chr6 108362613 108487058 +SNX3 chr6 108532426 108582464 +FOXO3 chr6 108881038 109005977 +ARMC2 chr6 109169619 109295186 +SESN1 chr6 109307640 109416022 +CEP57L1 chr6 109416313 109485135 +CD164 chr6 109687717 109703762 +MICAL1 chr6 109765265 109787171 +ZBTB24 chr6 109783797 109804440 +FIG4 chr6 110012499 110146631 +GPR6 chr6 110299514 110301921 +WASF1 chr6 110421022 110501207 +CDC40 chr6 110501344 110575478 +AMD1 chr6 111195973 111216916 +GTF3C6 chr6 111279763 111289093 +RPF2 chr6 111303218 111349466 +GSTM2P1 chr6 111368071 111368724 +KIAA1919 chr6 111580551 111592370 +REV3L chr6 111620234 111804918 +TRAF3IP2-AS1 chr6 111804714 111919505 +TRAF3IP2 chr6 111877657 111927481 +FYN chr6 111981535 112194655 +TUBE1 chr6 112391980 112408732 +FAM229B chr6 112408802 112423993 +LAMA4 chr6 112429963 112576141 +MARCKS chr6 114178541 114184648 +HDAC2 chr6 114254192 114332472 +NT5DC1 chr6 116422012 116570660 +TSPYL4 chr6 116571151 116575261 +DSE chr6 116575336 116762424 +TSPYL1 chr6 116597741 116601066 +RWDD1 chr6 116892530 116918838 +ZUFSP chr6 116956781 116989957 +KPNA5 chr6 117002350 117063029 +GPRC6A chr6 117113248 117150200 +GOPC chr6 117639374 117923691 +NUS1 chr6 117996665 118031803 +SLC35F1 chr6 118228689 118638839 +CEP85L chr6 118781935 119031238 +PLN chr6 118869461 118881893 +MCM9 chr6 119134605 119256327 +ASF1A chr6 119215384 119230332 +HSF2 chr6 122720691 122754264 +SERINC1 chr6 122764499 122792967 +FABP7 chr6 123100620 123105219 +SMPDL3A chr6 123110315 123130865 +HDDC2 chr6 125541108 125623282 +NCOA7 chr6 126102307 126252266 +HINT3 chr6 126277927 126301387 +TRMT11 chr6 126307576 126360422 +RNF146 chr6 127587755 127609712 +ECHDC1 chr6 127609855 127664754 +KIAA0408 chr6 127761488 127780536 +PTPRK chr6 128289924 128841870 +EPB41L2 chr6 131160487 131384462 +AKAP7 chr6 131456806 131604675 +ARG1 chr6 131894284 131905472 +MED23 chr6 131895106 131949369 +STX7 chr6 132767006 132834337 +TAAR8 chr6 132873832 132874860 +SLC18B1 chr6 133090509 133119701 +RPS12 chr6 133135580 133138703 +TBPL1 chr6 134273308 134311570 +HMGA1P7 chr6 134436373 134436874 +SGK1 chr6 134490384 134639250 +ALDH8A1 chr6 135238528 135271260 +HBS1L chr6 135281516 135424194 +AHI1 chr6 135604670 135818914 +BCLAF1 chr6 136578001 136610989 +MAP3K5 chr6 136878185 137113656 +PEX7 chr6 137143717 137235075 +IFNGR1 chr6 137518621 137540586 +TNFAIP3 chr6 138188351 138204449 +KIAA1244 chr6 138483058 138665800 +PBOV1 chr6 138537129 138539627 +NHSL1 chr6 138743180 139013708 +CCDC28A chr6 139094657 139114456 +REPS1 chr6 139224630 139309398 +ABRACL chr6 139349819 139364439 +CITED2 chr6 139693393 139695757 +VTA1 chr6 142468367 142545826 +HIVEP2 chr6 143072604 143266338 +AIG1 chr6 143381633 143661441 +PEX3 chr6 143771944 143811147 +FUCA2 chr6 143815948 143832827 +LTV1 chr6 144164481 144184949 +SF3B5 chr6 144416018 144416754 +FBXO30 chr6 146114638 146135889 +SHPRH chr6 146185381 146285559 +SASH1 chr6 148593440 148873186 +UST chr6 149068464 149398126 +TAB2 chr6 149539777 149732749 +SUMO4 chr6 149721495 149722177 +PPIL4 chr6 149825869 149867174 +GINM1 chr6 149887430 149912884 +KATNA1 chr6 149916009 149970108 +LATS1 chr6 149979289 150039392 +NUP43 chr6 150045451 150070801 +PCMT1 chr6 150070579 150132556 +RAET1G chr6 150238014 150244257 +MTHFD1L chr6 151186685 151423023 +AKAP12 chr6 151561134 151679692 +ZBTB2 chr6 151685252 151712683 +RMND1 chr6 151725989 151773259 +C6orf211 chr6 151773422 151791236 +MTRF1L chr6 153308497 153323820 +RGS17 chr6 153325594 153452384 +SCAF8 chr6 155054459 155155192 +TFB1M chr6 155578643 155635627 +ARID1B chr6 157099063 157531913 +TMEM242 chr6 157710418 157744633 +SERAC1 chr6 158530536 158589312 +TULP4 chr6 158733692 158932860 +TMEM181 chr6 158957468 159056460 +DYNLT1 chr6 159057506 159065771 +EZR chr6 159186773 159240444 +TAGAP chr6 159455500 159466184 +FNDC1 chr6 159590429 159693141 +SOD2 chr6 160090089 160183561 +WTAP chr6 160146617 160177351 +ACAT2 chr6 160181360 160200144 +TCP1 chr6 160199530 160210781 +MRPL18 chr6 160210844 160219468 +IGF2R chr6 160390131 160534539 +LPAL2 chr6 160874460 160932156 +MAP3K4 chr6 161412759 161538417 +AGPAT4 chr6 161551011 161695093 +PARK2 chr6 161768452 163148803 +PACRG chr6 163148164 163736524 +CAHM chr6 163834097 163834992 +QKI chr6 163835032 163999628 +SFT2D1 chr6 166733216 166756079 +MPC1 chr6 166778407 166796486 +RPS6KA2 chr6 166822852 167319939 +RNASET2 chr6 167342992 167370679 +FGFR1OP chr6 167412670 167466201 +UNC93A chr6 167684657 167729507 +MLLT4 chr6 168227602 168372703 +THBS2 chr6 169615875 169654139 +C6orf120 chr6 170102233 170106401 +LINC00574 chr6 170190417 170202969 +DLL1 chr6 170591294 170599561 +FAM120B chr6 170599791 170716153 +PSMB1 chr6 170844205 170862429 +TBP chr6 170863390 170881958 +PDCD2 chr6 170884383 170893780 +PDGFA chr7 536895 559933 +PRKAR1B chr7 588834 767287 +SUN1 chr7 855528 936072 +GET4 chr7 916189 936073 +COX19 chr7 938415 1015235 +C7orf50 chr7 1036622 1177896 +GPR146 chr7 1084212 1098897 +ZFAND2A chr7 1191707 1200395 +INTS1 chr7 1509913 1545489 +TMEM184A chr7 1581871 1600457 +PSMG3 chr7 1606966 1610641 +MAD1L1 chr7 1855429 2272878 +FTSJ2 chr7 2273866 2281840 +NUDT1 chr7 2281857 2290781 +EIF3B chr7 2393721 2420380 +CHST12 chr7 2443223 2474242 +BRAT1 chr7 2577511 2595361 +TTYH3 chr7 2671585 2704436 +AMZ1 chr7 2719156 2815134 +GNA12 chr7 2767746 2883958 +SDK1 chr7 3341080 4308632 +FOXK1 chr7 4683388 4811074 +AP5Z1 chr7 4815253 4833943 +MMD2 chr7 4945620 4998844 +RNF216P1 chr7 5013619 5080306 +WIPI2 chr7 5229819 5273457 +ACTB chr7 5566782 5603415 +FSCN1 chr7 5632439 5646286 +RNF216 chr7 5659678 5821370 +OCM chr7 5919458 5925993 +CCZ1 chr7 5938356 5965605 +PMS2 chr7 6012870 6048756 +AIMP2 chr7 6048876 6063465 +EIF2AK1 chr7 6061881 6098861 +USP42 chr7 6144515 6201195 +FAM220A chr7 6369040 6388612 +RAC1 chr7 6414154 6443608 +DAGLB chr7 6448757 6523821 +KDELR2 chr7 6485584 6523873 +ZDHHC4 chr7 6617065 6629005 +ZNF12 chr7 6728064 6746554 +PMS2CL chr7 6749759 6793493 +CCZ1B chr7 6833765 6866401 +C1GALT1 chr7 7196565 7288282 +MIOS chr7 7606503 7648560 +RPA3 chr7 7676149 7758238 +GLCCI1 chr7 8008425 8133902 +ICA1 chr7 8152814 8302317 +NXPH1 chr7 8473585 8792593 +NDUFA4 chr7 10971578 10979883 +PHF14 chr7 11013499 11209250 +THSD7A chr7 11409984 11871824 +TMEM106B chr7 12250867 12282993 +SCIN chr7 12610203 12693228 +ARL4A chr7 12726481 12730559 +ETV1 chr7 13930853 14031050 +DGKB chr7 14184674 15014402 +SOSTDC1 chr7 16501106 16570205 +ANKMY2 chr7 16639401 16685442 +BZW2 chr7 16685756 16746148 +TSPAN13 chr7 16793160 16824161 +SNX13 chr7 17830385 17980124 +HDAC9 chr7 18126572 19042039 +TWISTNB chr7 19735085 19748710 +ITGB8 chr7 20370325 20455377 +SP8 chr7 20823906 20826505 +SP4 chr7 21467652 21554440 +CDCA7L chr7 21940518 21985702 +RAPGEF5 chr7 22157856 22396763 +TOMM7 chr7 22852251 22862470 +FAM126A chr7 22980878 23053749 +KLHL7 chr7 23145353 23217533 +NUPL2 chr7 23221446 23240630 +MALSU1 chr7 23338358 23351348 +TRA2A chr7 23544399 23571660 +CCDC126 chr7 23636998 23684327 +FAM221A chr7 23719749 23742868 +DFNA5 chr7 24737972 24809244 +CYCS chr7 25159710 25164980 +NFE2L3 chr7 26191860 26226745 +HNRNPA2B1 chr7 26229547 26241149 +CBX3 chr7 26240782 26252976 +SKAP2 chr7 26706681 27034858 +HOXA3 chr7 27145803 27192200 +HOXA11 chr7 27221129 27224842 +HIBADH chr7 27565061 27702614 +TAX1BP1 chr7 27778950 27884183 +JAZF1 chr7 27870192 28220362 +CREB5 chr7 28338940 28865511 +TRIL chr7 28992974 28997934 +CPVL chr7 29034847 29235067 +CHN2 chr7 29161890 29553944 +SCRN1 chr7 29959719 30029905 +FKBP14 chr7 30050203 30066300 +C7orf41 chr7 30174426 30202378 +GGCT chr7 30536237 30591095 +GARS chr7 30634297 30673649 +AQP1 chr7 30893010 30963427 +ADCYAP1R1 chr7 31092076 31151089 +LSM5 chr7 32524951 32534895 +AVL9 chr7 32535038 33078516 +ZNRF2P1 chr7 32768181 32768623 +KBTBD2 chr7 32907784 32933743 +RP9P chr7 32956427 32982788 +RP9 chr7 33134409 33149013 +BBS9 chr7 33168856 33645680 +DPY19L1 chr7 34968488 35077883 +HERPUD2 chr7 35672269 35735181 +SEPT7 chr7 35840542 35944917 +EEPD1 chr7 36192758 36341152 +ANLN chr7 36429415 36493400 +AOAH chr7 36552456 36764154 +AOAH-IT1 chr7 36637440 36639726 +ELMO1 chr7 36893961 37488852 +EPDR1 chr7 37723446 37991543 +STARD3NL chr7 38217824 38270272 +AMPH chr7 38423305 38671167 +VPS41 chr7 38762563 38971994 +YAE1D1 chr7 39605975 39649919 +RALA chr7 39663082 39747723 +LINC00265 chr7 39773231 39832691 +MPLKIP chr7 40165622 40174258 +INHBA-AS1 chr7 41733514 41818986 +C7orf25 chr7 42948325 42951904 +PSMA2 chr7 42956460 42971822 +MRPL32 chr7 42971799 42988557 +STK17A chr7 43622357 43666385 +COA1 chr7 43648055 43769316 +BLVRA chr7 43798279 43846939 +MRPS24 chr7 43906157 43909492 +URGCP chr7 43915493 43966010 +UBE2D4 chr7 43966037 43995735 +SPDYE1 chr7 44040488 44049721 +DBNL chr7 44084239 44109055 +POLM chr7 44111846 44122139 +AEBP1 chr7 44143960 44154161 +POLD2 chr7 44154286 44163957 +YKT6 chr7 44240567 44253893 +NUDCD3 chr7 44418720 44530479 +DDX56 chr7 44605016 44614650 +TMED4 chr7 44617493 44621886 +OGDH chr7 44646171 44748665 +ZMIZ2 chr7 44788180 44809477 +PPIA chr7 44836279 44864163 +H2AFV chr7 44866390 44887682 +PURB chr7 44915896 44924960 +SNHG15 chr7 45022622 45026560 +CCM2 chr7 45039074 45116068 +NACAD chr7 45120037 45128513 +TBRG4 chr7 45139699 45151646 +SEPT7P2 chr7 45763379 45808617 +TNS3 chr7 47314752 47622156 +HUS1 chr7 47735328 48019178 +UPP1 chr7 48128225 48148330 +VWC2 chr7 49813257 49961546 +FIGNL1 chr7 50511831 50518088 +SEC61G chr7 54819943 54827667 +EGFR chr7 55086714 55324313 +LANCL2 chr7 55433141 55501435 +VOPP1 chr7 55503749 55640681 +ZNF713 chr7 55955169 56009918 +GBAS chr7 56019486 56067874 +MRPS17 chr7 56019512 56024192 +PSPH chr7 56078744 56119297 +CCT6A chr7 56119323 56131682 +SUMF2 chr7 56131695 56148363 +PHKG1 chr7 56148440 56160689 +CHCHD2 chr7 56169262 56174269 +ZNF716 chr7 57509883 57533252 +ZNF727 chr7 63505821 63538927 +ZNF736 chr7 63767837 63815238 +ZNF680 chr7 63980262 64023484 +ZNF107 chr7 64126511 64171404 +ZNF138 chr7 64254766 64294054 +ZNF273 chr7 64330550 64391344 +ZNF117 chr7 64432154 64467062 +ERV3-1 chr7 64451187 64467121 +CCT6P3 chr7 64498732 64535091 +ZNF92 chr7 64838712 64866038 +CCT6P1 chr7 65216129 65228341 +VKORC1L1 chr7 65338254 65424550 +GUSB chr7 65425671 65447301 +ASL chr7 65540785 65558545 +CRCP chr7 65579591 65619555 +TPST1 chr7 65670186 65885530 +LINC00174 chr7 65841031 65866325 +KCTD7 chr7 66093868 66276446 +RABGEF1 chr7 66147151 66276451 +TMEM248 chr7 66386212 66423538 +SBDS chr7 66452664 66460588 +TYW1 chr7 66460160 66704501 +PMS2P4 chr7 66752528 66767894 +STAG3L4 chr7 66767608 66786513 +SBDSP1 chr7 72300004 72307909 +SPDYE7P chr7 72333321 72341076 +POM121 chr7 72349936 72421979 +NSUN5P2 chr7 72418120 72425329 +STAG3L3 chr7 72440227 72476445 +SPDYE8P chr7 72490260 72500313 +NSUN5 chr7 72716514 72722864 +BAZ1B chr7 72854728 72936608 +BCL7B chr7 72950686 72972332 +TBL2 chr7 72983262 72993121 +DNAJC30 chr7 73095299 73097783 +WBSCR22 chr7 73097355 73119491 +ABHD11 chr7 73150424 73153197 +ELN chr7 73442119 73484237 +EIF4H chr7 73588575 73611431 +LAT2 chr7 73613982 73644161 +RFC2 chr7 73645829 73668774 +CLIP2 chr7 73703805 73820273 +GTF2IRD1 chr7 73868120 74016931 +GTF2I chr7 74071994 74175026 +STAG3L2 chr7 74112305 74306731 +GTF2IRD2 chr7 74210483 74267847 +PMS2P5 chr7 74306894 74366314 +WBSCR16 chr7 74441226 74490064 +GTF2IRD2B chr7 74508364 74565623 +GTF2IP1 chr7 74602783 74653438 +STAG3L1 chr7 74988448 75024657 +NSUN5P1 chr7 75039605 75046066 +POM121C chr7 75046066 75115548 +HIP1 chr7 75162621 75368280 +CCL26 chr7 75398851 75419214 +RHBDD2 chr7 75471920 75518244 +POR chr7 75528518 75616173 +TMEM120A chr7 75616155 75623977 +STYXL1 chr7 75625656 75677322 +MDH2 chr7 75677369 75696826 +HSPB1 chr7 75931861 75933612 +YWHAG chr7 75956116 75988348 +DTX2 chr7 76090993 76135312 +POMZP3 chr7 76239303 76256578 +PTPN12 chr7 77166592 77269388 +RSBN1L-AS1 chr7 77286977 77325582 +RSBN1L chr7 77325760 77412339 +TMEM60 chr7 77423045 77427897 +PHTF2 chr7 77428122 77586818 +MAGI2 chr7 77646393 79082890 +MAGI2-AS3 chr7 79082198 79100524 +GNAI1 chr7 79763271 79848718 +CD36 chr7 79998891 80308593 +CACNA2D1 chr7 81575760 82073114 +GRM3 chr7 86273230 86494200 +KIAA1324L chr7 86506222 86689015 +DMTF1 chr7 86781677 86825653 +TMEM243 chr7 86825478 86849903 +TP53TG1 chr7 86954541 86974831 +CROT chr7 86974997 87029111 +SLC25A40 chr7 87462883 87505672 +DBF4 chr7 87505531 87538856 +ADAM22 chr7 87563458 87832204 +SRI chr7 87834433 87856308 +STEAP2 chr7 89796904 89867451 +GTPBP10 chr7 89964537 90020769 +CLDN12 chr7 90013035 90142716 +MTERF chr7 91321323 91510034 +AKAP9 chr7 91570181 91739987 +CYP51A1 chr7 91741465 91772266 +KRIT1 chr7 91828283 91875480 +ANKIB1 chr7 91875548 92030698 +GATAD1 chr7 92076767 92088150 +PEX1 chr7 92116334 92157845 +RBM48 chr7 92158087 92167319 +FAM133B chr7 92190107 92219708 +CCDC132 chr7 92861653 92988338 +MIR489 chr7 93113248 93113331 +BET1 chr7 93592074 93633694 +CASD1 chr7 94138531 94186331 +SGCE chr7 94214542 94285521 +PEG10 chr7 94285637 94299007 +PPP1R9A chr7 94536514 94925727 +PON2 chr7 95034175 95064510 +PDK4 chr7 95212811 95225803 +DYNC1I1 chr7 95401866 95739634 +SLC25A13 chr7 95749532 95951459 +SHFM1 chr7 96110938 96339203 +ACN9 chr7 96745902 96811075 +ASNS chr7 97481430 97501854 +LMTK2 chr7 97736197 97838945 +TECPR1 chr7 97843936 97881563 +BRI3 chr7 97881691 97937162 +TMEM130 chr7 98444111 98468394 +KPNA7 chr7 98771197 98805129 +ARPC1A chr7 98923521 98985787 +ARPC1B chr7 98971872 98992424 +PDAP1 chr7 98989671 99006452 +BUD31 chr7 99006264 99017239 +PTCD1 chr7 99014362 99063787 +CPSF4 chr7 99036545 99054994 +ATP5J2 chr7 99046098 99063954 +ZNF789 chr7 99070464 99101273 +ZNF394 chr7 99084142 99097947 +ZKSCAN5 chr7 99102274 99132323 +FAM200A chr7 99143931 99156159 +ZNF655 chr7 99156029 99174076 +ZSCAN25 chr7 99214569 99230030 +OR2AE1 chr7 99473610 99474680 +TRIM4 chr7 99474581 99517223 +AZGP1 chr7 99564343 99573780 +ZKSCAN1 chr7 99613204 99639312 +ZSCAN21 chr7 99647390 99662661 +ZNF3 chr7 99661656 99680171 +COPS6 chr7 99686577 99689823 +MCM7 chr7 99690351 99699563 +MIR25 chr7 99691183 99691266 +AP4M1 chr7 99699172 99707968 +TAF6 chr7 99704693 99717464 +CNPY4 chr7 99717236 99723134 +LAMTOR4 chr7 99746530 99753567 +GAL3ST4 chr7 99756867 99766373 +GPC2 chr7 99767229 99774995 +GATS chr7 99798283 99869855 +PVRIG chr7 99815864 99819113 +PMS2P1 chr7 99918615 99939531 +PILRB chr7 99933737 99965356 +PILRA chr7 99965153 99997719 +ZCWPW1 chr7 99998449 100026615 +MEPCE chr7 100026413 100031741 +PPP1R35 chr7 100032905 100034188 +C7orf61 chr7 100054238 100061894 +TSC22D4 chr7 100060982 100076902 +MOSPD3 chr7 100209725 100213007 +ACTL6B chr7 100240720 100254084 +GNB2 chr7 100271154 100276797 +GIGYF1 chr7 100277130 100287071 +POP7 chr7 100303676 100305118 +SLC12A9 chr7 100424442 100464631 +TRIP6 chr7 100464760 100471076 +SRRT chr7 100472733 100486285 +ACHE chr7 100487615 100494594 +AP1S1 chr7 100797678 100804877 +VGF chr7 100805790 100808874 +PLOD3 chr7 100849258 100861701 +ZNHIT1 chr7 100860949 100867471 +FIS1 chr7 100882739 100895597 +RABL5 chr7 100956975 100965104 +CUX1 chr7 101458959 101927249 +PRKRIP1 chr7 102004319 102067123 +ORAI2 chr7 102073553 102097268 +ALKBH4 chr7 102096685 102105323 +LRWD1 chr7 102105376 102113615 +POLR2J chr7 102113565 102119354 +POLR2J3 chr7 102178365 102213103 +SPDYE2 chr7 102191679 102202755 +RASA4 chr7 102220093 102257204 +UPK3BL chr7 102277472 102283238 +POLR2J2 chr7 102277474 102312076 +FAM185A chr7 102389418 102449672 +ARMC10 chr7 102715328 102740205 +NAPEPLD chr7 102740223 102790007 +DPY19L2P2 chr7 102815580 102920857 +PMPCB chr7 102937869 102969958 +DNAJC2 chr7 102952921 102985320 +PSMC2 chr7 102984701 103009842 +ORC5 chr7 103766788 103848495 +LHFPL3 chr7 103969104 104549001 +LHFPL3-AS1 chr7 104379044 104444554 +SRPK2 chr7 104751151 105039755 +RINT1 chr7 105172532 105208124 +SYPL1 chr7 105730949 105753022 +NAMPT chr7 105888731 105926772 +PIK3CG chr7 106505723 106547590 +HBP1 chr7 106809406 106842974 +COG5 chr7 106842000 107204959 +DUS4L chr7 107203929 107218906 +BCAP29 chr7 107220422 107269615 +CBLL1 chr7 107384142 107402112 +SLC26A3 chr7 107405912 107443670 +DLD chr7 107531415 107572175 +NRCAM chr7 107788068 108097161 +PNPLA8 chr7 108110866 108210110 +THAP5 chr7 108194987 108210194 +DNAJB9 chr7 108210012 108215294 +IMMP2L chr7 110303110 111202573 +LRRN3 chr7 110731062 110765510 +DOCK4 chr7 111366166 111846466 +ZNF277 chr7 111846643 111983151 +IFRD1 chr7 112063023 112121072 +TMEM168 chr7 112402437 112430647 +C7orf60 chr7 112459202 112579971 +MIR3666 chr7 114293400 114293510 +CAPZA2 chr7 116451124 116562103 +ST7 chr7 116593292 116870157 +ST7-OT4 chr7 116593953 116738860 +CTTNBP2 chr7 117350705 117514193 +NAA38 chr7 117824086 117832878 +KCND2 chr7 119913722 120390385 +ING3 chr7 120590803 120617270 +FAM3C chr7 120988905 121036418 +PTPRZ1 chr7 121513143 121702090 +AASS chr7 121715701 121784334 +RNF148 chr7 122341718 122343021 +NDUFA5 chr7 123177051 123198309 +WASL chr7 123321989 123389121 +SPAM1 chr7 123565286 123611468 +GPR37 chr7 124386051 124405681 +POT1 chr7 124462440 124570037 +ZNF800 chr7 126986844 127071978 +GCC1 chr7 127220672 127233665 +ARF5 chr7 127228399 127231759 +PAX4 chr7 127250346 127255982 +SND1 chr7 127292234 127732661 +LRRC4 chr7 127667124 127672160 +RBM28 chr7 127937738 127983962 +IMPDH1 chr7 128032331 128050306 +HILPDA chr7 128095903 128098472 +METTL2B chr7 128116783 128146656 +FAM71F2 chr7 128312342 128326929 +CALU chr7 128379346 128411861 +CCDC136 chr7 128430811 128462186 +ATP6V1F chr7 128502880 128505898 +TNPO3 chr7 128594948 128695198 +TPI1P2 chr7 128696073 128696824 +AHCYL2 chr7 128864864 129070052 +NRF1 chr7 129251555 129396922 +MIR183 chr7 129414745 129414854 +UBE2H chr7 129470572 129592789 +ZC3HC1 chr7 129658126 129691291 +KLHDC10 chr7 129710350 129775560 +TMEM209 chr7 129804555 129847610 +CEP41 chr7 130033612 130082274 +MEST chr7 130126012 130146133 +COPG2 chr7 130146089 130353598 +MKLN1 chr7 130794855 131181395 +PODXL chr7 131185021 131242976 +CHCHD3 chr7 132469629 132766848 +EXOC4 chr7 132937829 133751342 +SLC35B4 chr7 133974084 134001803 +AKR1B1 chr7 134127102 134144036 +BPGM chr7 134331560 134364565 +CALD1 chr7 134429003 134655479 +C7orf49 chr7 134777115 134855547 +TMEM140 chr7 134832824 134850967 +WDR91 chr7 134868590 134896316 +CNOT4 chr7 135046547 135194875 +NUP205 chr7 135242667 135333505 +C7orf73 chr7 135347244 135378166 +MTPN chr7 135611509 135662101 +LUZP6 chr7 135612022 135612198 +CHRM2 chr7 136553416 136705002 +PTN chr7 136912088 137028611 +DGKI chr7 137065783 137531838 +CREB3L2 chr7 137559725 137686813 +TRIM24 chr7 138145079 138274738 +TMEM213 chr7 138482695 138522846 +ZC3HAV1 chr7 138728266 138794465 +UBN2 chr7 138915102 138992982 +C7orf55 chr7 139024203 139031065 +LUC7L2 chr7 139026106 139107345 +CLEC2L chr7 139208602 139229730 +HIPK2 chr7 139246316 139477577 +TBXAS1 chr7 139476850 139720125 +SLC37A3 chr7 139993493 140104233 +MKRN1 chr7 140152840 140179369 +NDUFB2 chr7 140390577 140422590 +BRAF chr7 140419127 140624564 +MRPS33 chr7 140705854 140715028 +AGK chr7 141250989 141355044 +KIAA1147 chr7 141356528 141401953 +SSBP1 chr7 141438121 141487722 +TAS2R4 chr7 141478242 141479235 +MTRNR2L6 chr7 142374104 142375550 +PRSS1 chr7 142457319 142460923 +PIP chr7 142829170 142836839 +GSTK1 chr7 142941186 142967947 +CASP2 chr7 142985308 143004789 +FAM131B chr7 143050493 143059863 +ZYX chr7 143078173 143088204 +EPHA1-AS1 chr7 143104906 143220542 +FAM115A chr7 143548468 143599291 +OR2A12 chr7 143792141 143793186 +ARHGEF5 chr7 144052381 144077725 +CNTNAP2 chr7 145813453 148118090 +CUL1 chr7 148395006 148498128 +EZH2 chr7 148504475 148581413 +PDIA4 chr7 148700154 148725733 +ZNF786 chr7 148766735 148787874 +ZNF425 chr7 148799876 148823438 +ZNF282 chr7 148892577 148923339 +ZNF212 chr7 148936742 148952700 +ZNF767 chr7 149244245 149321843 +KRBA1 chr7 149411872 149431664 +ATP6V0E2 chr7 149570057 149577784 +REPIN1 chr7 150065278 150071133 +ZNF775 chr7 150065879 150109558 +GIMAP4 chr7 150264365 150271041 +GIMAP2 chr7 150382785 150390729 +TMEM176B chr7 150488373 150498448 +KCNH2 chr7 150642049 150675403 +ABCB8 chr7 150725510 150744869 +CDK5 chr7 150750899 150755617 +FASTK chr7 150773711 150777953 +TMUB1 chr7 150778167 150780633 +AGAP3 chr7 150782918 150841523 +GBX1 chr7 150845676 150871832 +ABCF2 chr7 150904923 150924316 +NUB1 chr7 151038785 151075535 +MIR3907 chr7 151130575 151130725 +RHEB chr7 151163098 151217206 +PRKAG2 chr7 151253197 151574210 +GALNT11 chr7 151722759 151819425 +DPP6 chr7 153584182 154685995 +PAXIP1 chr7 154735397 154794794 +INSIG1 chr7 155089486 155101945 +RBM33 chr7 155437145 155574179 +LMBR1 chr7 156461646 156685924 +NOM1 chr7 156742417 156765876 +DNAJB6 chr7 157128075 157210133 +PTPRN2 chr7 157331750 158380480 +ESYT2 chr7 158523686 158622944 +WDR60 chr7 158649269 158749438 +LINC00689 chr7 158799213 158822886 +VIPR2 chr7 158820866 158937649 +RPL23AP53 chr8 163186 182231 +ZNF596 chr8 182137 197342 +FBXO25 chr8 356428 421225 +ERICH1 chr8 564746 688106 +CLN8 chr8 1703944 1734738 +CSMD1 chr8 2792875 4852494 +MCPH1 chr8 6264113 6501144 +AGPAT5 chr8 6565878 6617184 +XKR5 chr8 6666038 6693166 +DEFT1P2 chr8 6863815 6864911 +DEFB103B chr8 7286410 7287870 +DEFB106A chr8 7682694 7686575 +FAM86B3P chr8 8086117 8102387 +ERI1 chr8 8859657 8974256 +TNKS chr8 9413424 9639856 +MSRA chr8 9911778 10286401 +PINX1 chr8 10622473 10697394 +MTMR9 chr8 11141925 11185646 +TDH chr8 11197146 11225961 +NEIL2 chr8 11627148 11644855 +FDFT1 chr8 11653082 11696818 +CTSB chr8 11700033 11726957 +DEFB135 chr8 11839830 11842099 +DEFB109P1 chr8 12250718 12257875 +TUSC3 chr8 15274724 15624158 +MSR1 chr8 15965387 16424999 +ZDHHC2 chr8 17013538 17082308 +CNOT7 chr8 17086737 17104387 +VPS37A chr8 17104080 17159936 +MTMR7 chr8 17155539 17271037 +MTUS1 chr8 17501304 17658426 +FGL1 chr8 17721889 17767874 +PCM1 chr8 17780349 17885478 +ASAH1 chr8 17913934 17942494 +PSD3 chr8 18384811 18942240 +CSGALNACT1 chr8 19261672 19615540 +INTS10 chr8 19674651 19709594 +LPL chr8 19759228 19824769 +SLC18A1 chr8 20002366 20040717 +ATP6V1B2 chr8 20054878 20084330 +XPO7 chr8 21777180 21864096 +FAM160B2 chr8 21946670 21962409 +NUDT18 chr8 21964383 21966932 +REEP4 chr8 21995533 21999464 +BMP1 chr8 22022249 22069839 +POLR3D chr8 22102617 22112113 +SLC39A14 chr8 22224762 22291642 +PPP3CC chr8 22298332 22398652 +SORBS3 chr8 22402499 22433301 +PDLIM2 chr8 22435792 22455538 +BIN3 chr8 22477931 22526661 +EGR3 chr8 22545172 22550815 +RHOBTB2 chr8 22844930 22877712 +TNFRSF10B chr8 22877646 22926692 +TNFRSF10C chr8 22941868 22974950 +CHMP7 chr8 23101150 23119512 +R3HCC1 chr8 23127633 23153792 +ENTPD4 chr8 23243296 23315208 +SLC25A37 chr8 23386318 23432976 +STC1 chr8 23699428 23712320 +ADAM28 chr8 24151553 24216531 +EBF2 chr8 25699246 25902913 +PPP2R2A chr8 26149007 26230196 +BNIP3L chr8 26240414 26363152 +PNMA2 chr8 26362202 26371608 +DPYSL2 chr8 26371791 26515694 +ADRA1A chr8 26605667 26724790 +STMN4 chr8 27092840 27115937 +TRIM35 chr8 27142404 27168836 +PTK2B chr8 27168999 27316903 +CHRNA2 chr8 27317279 27337400 +CLU chr8 27454434 27472548 +SCARA3 chr8 27491385 27534293 +CCDC25 chr8 27590835 27630170 +NUGGC chr8 27879481 27941388 +ELP3 chr8 27947190 28048673 +FZD3 chr8 28351729 28431775 +EXTL3 chr8 28457986 28613116 +INTS9 chr8 28625178 28747759 +HMBOX1 chr8 28747911 28922281 +KIF13B chr8 28924796 29120641 +TMEM66 chr8 29920528 29940723 +LEPROTL1 chr8 29952914 30034724 +DCTN6 chr8 30013813 30041156 +GTF2E2 chr8 30435835 30515768 +SMIM18 chr8 30496117 30503581 +GSR chr8 30535583 30585443 +UBXN8 chr8 30589764 30624522 +PPP2CB chr8 30631973 30671830 +FUT10 chr8 33228342 33330940 +TTI2 chr8 33330904 33371119 +MAK16 chr8 33342268 33358778 +RNF122 chr8 33405273 33424643 +DUSP26 chr8 33448856 33457624 +ERLIN2 chr8 37594117 37616619 +PROSC chr8 37620111 37637283 +BRF2 chr8 37700786 37707422 +EIF4EBP1 chr8 37887859 37917883 +ASH2L chr8 37962760 38001594 +LSM1 chr8 38020839 38034248 +BAG4 chr8 38034051 38070819 +DDHD2 chr8 38082736 38133076 +PPAPDC1B chr8 38120648 38126761 +WHSC1L1 chr8 38127215 38239790 +FGFR1 chr8 38268656 38326352 +RNF5P1 chr8 38458179 38458718 +TACC1 chr8 38585704 38710546 +TM2D2 chr8 38846327 38854343 +ADAM9 chr8 38854388 38962663 +SFRP1 chr8 41119481 41167016 +GOLGA7 chr8 41347915 41368499 +AGPAT6 chr8 41434706 41482520 +KAT6A chr8 41786997 41909508 +AP3M2 chr8 42010464 42029191 +PLAT chr8 42032236 42065242 +IKBKB chr8 42128820 42189973 +POLB chr8 42195972 42229326 +VDAC3 chr8 42249142 42263415 +SLC20A2 chr8 42273993 42397069 +THAP1 chr8 42691817 42698468 +RNF170 chr8 42704780 42752433 +MIR4469 chr8 42751340 42751418 +HOOK3 chr8 42752075 42885682 +FNTA chr8 42889337 42940931 +HGSNAT chr8 42995556 43057998 +CEBPD chr8 48649471 48651648 +PRKDC chr8 48685669 48872743 +MCM4 chr8 48872745 48890720 +UBE2V2 chr8 48920960 48977268 +EFCAB1 chr8 49623348 49647870 +PCMTD1 chr8 52730140 52811735 +RB1CC1 chr8 53535016 53658403 +OPRK1 chr8 54138284 54164257 +ATP6V1H chr8 54628117 54756118 +TCEA1 chr8 54879112 54935089 +LYPLA1 chr8 54958938 55014577 +MRPL15 chr8 55047770 55060461 +XKR4 chr8 56014949 56454613 +TMEM68 chr8 56608983 56685966 +TGS1 chr8 56685701 56738007 +RPS20 chr8 56979854 56987069 +CHCHD7 chr8 57124245 57131357 +IMPAD1 chr8 57870492 57906403 +FAM110B chr8 58907068 59116838 +UBXN2B chr8 59323823 59364060 +SDCBP chr8 59465483 59495419 +NSMAF chr8 59496063 59572403 +TOX chr8 59717977 60031767 +CA8 chr8 61099906 61193971 +RAB2A chr8 61429416 61536186 +CHD7 chr8 61591337 61779465 +CLVS1 chr8 61969717 62414204 +ASPH chr8 62413116 62627155 +GGH chr8 63927638 63951730 +YTHDF3 chr8 64081112 64125346 +CYP7B1 chr8 65500320 65711318 +ARMC1 chr8 66514694 66546442 +RRS1 chr8 67341263 67342966 +ADHFE1 chr8 67342420 67383836 +C8orf46 chr8 67372238 67430759 +MYBL1 chr8 67474410 67526482 +VCPIP1 chr8 67540722 67579452 +SGK3 chr8 67624653 67774257 +SNHG6 chr8 67833919 67838633 +PPP1R42 chr8 67876334 67968839 +COPS5 chr8 67955314 67996018 +CSPP1 chr8 67974661 68108498 +ARFGEF1 chr8 68085747 68255912 +PREX2 chr8 68864353 69149265 +NCOA2 chr8 71021997 71316040 +TRAM1 chr8 71485677 71520622 +XKR9 chr8 71581600 71702606 +TERF1 chr8 73921099 73960357 +RPL7 chr8 74202506 74208024 +STAU2 chr8 74332604 74659943 +UBE2W chr8 74692332 74791145 +TCEB1 chr8 74851404 74884522 +TMEM70 chr8 74884672 74895018 +LY96 chr8 74903587 74941322 +GDAP1 chr8 75233365 75401107 +MIR5681A chr8 75460778 75460852 +ZFHX4 chr8 77593454 77779521 +PEX2 chr8 77892494 77913280 +PKIA chr8 79428374 79517502 +ZC2HC1A chr8 79578282 79632000 +STMN2 chr8 80523049 80578410 +HEY1 chr8 80676245 80680098 +MRPS28 chr8 80830952 80942524 +TPD52 chr8 80870571 81143467 +ZNF704 chr8 81540686 81787016 +PAG1 chr8 81880045 82024303 +FABP5 chr8 82192598 82197012 +PMP2 chr8 82352561 82359758 +FABP9 chr8 82370576 82373814 +IMPA1 chr8 82570196 82598928 +ZFAND1 chr8 82613569 82645138 +SNX16 chr8 82711816 82755101 +LRRCC1 chr8 86019382 86058311 +E2F5 chr8 86089460 86129387 +C8orf59 chr8 86126311 86132650 +CA2 chr8 86376081 86393722 +REXO1L1 chr8 86568695 86575726 +SLC7A13 chr8 87226281 87333375 +WWP1 chr8 87354967 87490649 +RMDN1 chr8 87480486 87526586 +CPNE3 chr8 87497059 87573726 +MMP16 chr8 89044237 89340254 +RIPK2 chr8 90769975 90803291 +NBN chr8 90945564 91015456 +DECR1 chr8 91013633 91064320 +TMEM55A chr8 92006024 92053292 +OTUD6B chr8 92082424 92099323 +SLC26A7 chr8 92221722 92410378 +RUNX1T1 chr8 92967203 93115514 +TRIQK chr8 93895758 94029901 +FAM92A1 chr8 94710789 94743755 +RBM12B chr8 94741584 94753245 +RBM12B-AS1 chr8 94752349 94753001 +PDP1 chr8 94870035 94938294 +MIR378D2 chr8 94927962 94928910 +KIAA1429 chr8 95499921 95565757 +DPY19L4 chr8 95731931 95806064 +INTS8 chr8 95825539 95893974 +CCNE2 chr8 95891998 95908906 +NDUFAF6 chr8 95907995 96128683 +TP53INP1 chr8 95938200 95961639 +PLEKHF2 chr8 96146032 96168912 +UQCRB chr8 97238148 97247862 +MTERFD1 chr8 97251626 97273838 +PTDSS1 chr8 97273943 97349223 +CPQ chr8 97657455 98161882 +MTDH chr8 98656407 98740998 +LAPTM4B chr8 98787285 98865241 +MATN2 chr8 98881068 99048944 +RPL30 chr8 99037079 99058697 +HRSP12 chr8 99114572 99129469 +STK3 chr8 99413631 99955055 +VPS13B chr8 100025494 100889808 +COX6C chr8 100885428 100906290 +POLR2K chr8 101162812 101166230 +SPAG1 chr8 101170134 101271506 +RNF19A chr8 101269288 101348446 +ANKRD46 chr8 101521980 101572012 +PABPC1 chr8 101698044 101735037 +YWHAZ chr8 101928753 101965616 +ZNF706 chr8 102190106 102218421 +NCALD chr8 102698771 103137135 +RRM2B chr8 103216730 103251346 +UBR5 chr8 103265240 103425069 +KLF10 chr8 103661007 103668130 +AZIN1 chr8 103838585 103906092 +ATP6V1C1 chr8 104033291 104085279 +BAALC chr8 104152938 104242533 +MIR3151 chr8 104166842 104166917 +SLC25A32 chr8 104410863 104427417 +DCAF13 chr8 104426942 104455681 +LRP12 chr8 105501459 105601252 +ZFPM2 chr8 106330920 106816760 +OXR1 chr8 107282473 107764922 +EIF3E chr8 109213445 109447562 +EMC2 chr8 109455830 109499145 +NUDCD1 chr8 110253148 110346614 +ENY2 chr8 110346553 110358182 +EBAG9 chr8 110551940 110578225 +CSMD3 chr8 113235157 114449328 +TRPS1 chr8 116420724 116821899 +EIF3H chr8 117654369 117779164 +UTP23 chr8 117778742 117861702 +RAD21 chr8 117858174 117887105 +MED30 chr8 118532952 118552501 +COLEC10 chr8 120007691 120118821 +ENPP2 chr8 120569326 120685693 +TAF2 chr8 120743015 120845103 +DSCC1 chr8 120846216 120868250 +DEPTOR chr8 120885957 121063152 +MRPL13 chr8 121393000 121457642 +SNTB1 chr8 121547985 121825513 +ZHX2 chr8 123793633 123986750 +DERL1 chr8 124025404 124054663 +C8orf76 chr8 124232196 124253638 +ZHX1 chr8 124260696 124287781 +ATAD2 chr8 124332090 124428590 +WDYHV1 chr8 124428965 124479470 +FAM91A1 chr8 124780696 124827692 +TRMT12 chr8 125463048 125474391 +RNF139 chr8 125486979 125500155 +TATDN1 chr8 125500726 125551699 +NDUFB9 chr8 125551344 125580751 +MTSS1 chr8 125563031 125740730 +SQLE chr8 126010739 126034525 +KIAA0196 chr8 126036502 126104082 +NSMCE2 chr8 126103921 126379362 +TRIB1 chr8 126442563 126450647 +FAM84B chr8 127564687 127570638 +MYC chr8 128747680 128753674 +FAM49B chr8 130851839 131029375 +MIR5194 chr8 131020580 131020699 +ASAP1 chr8 131064353 131455906 +EFR3A chr8 132916335 133025889 +PHF20L1 chr8 133787618 133861052 +SLA chr8 134048973 134115298 +NDRG1 chr8 134249414 134314265 +KHDRBS3 chr8 136469700 136668965 +TRAPPC9 chr8 140742586 141468678 +CHRAC1 chr8 141521397 141527236 +PTK2 chr8 141667999 142012315 +DENND3 chr8 142127377 142205907 +GPR20 chr8 142366600 142377367 +TSNARE1 chr8 143293441 143484601 +ARC chr8 143692405 143696833 +JRK chr8 143738874 143763386 +LY6K chr8 143781529 143786545 +THEM6 chr8 143808621 143818345 +C8orf31 chr8 144120626 144141359 +ZNF696 chr8 144371846 144380231 +ZC3H3 chr8 144519825 144623623 +NAPRT1 chr8 144656955 144660819 +EEF1D chr8 144661867 144681711 +PYCRL chr8 144686083 144691943 +TSTA3 chr8 144694788 144700218 +ZNF623 chr8 144718183 144735900 +CCDC166 chr8 144788864 144790279 +PUF60 chr8 144898514 144912029 +NRBP2 chr8 144915764 144924200 +PLEC chr8 144989321 145050902 +MIR661 chr8 145019359 145019447 +GRINA chr8 145064226 145067583 +EXOSC4 chr8 145133529 145135550 +GPAA1 chr8 145137493 145141119 +CYC1 chr8 145149930 145152428 +SHARPIN chr8 145153536 145163027 +MAF1 chr8 145159402 145162514 +KIAA1875 chr8 145162629 145173218 +BOP1 chr8 145486055 145515082 +HSF1 chr8 145515280 145538385 +SLC52A2 chr8 145577795 145584932 +ADCK5 chr8 145596790 145618457 +CPSF1 chr8 145618444 145634753 +VPS28 chr8 145649000 145653931 +CYHR1 chr8 145674965 145691060 +KIFC2 chr8 145691426 145699585 +PPP1R16A chr8 145703352 145727504 +MFSD3 chr8 145734457 145736596 +LRRC14 chr8 145743376 145750557 +ZNF251 chr8 145946298 145981802 +ZNF34 chr8 145998499 146012730 +RPL8 chr8 146015150 146017972 +ZNF7 chr8 146052849 146072894 +COMMD5 chr8 146066427 146079121 +ZNF250 chr8 146076632 146127553 +ZNF16 chr8 146155744 146176274 +ZNF252P chr8 146198975 146228281 +ZNF252P-AS1 chr8 146228197 146231432 +C8orf33 chr8 146277764 146281416 +CBWD1 chr9 121041 188979 +DOCK8 chr9 214854 465259 +KANK1 chr9 470291 746105 +DMRT3 chr9 976964 991731 +SMARCA2 chr9 2015342 2193624 +VLDLR chr9 2621834 2660053 +KIAA0020 chr9 2720469 2844241 +RFX3 chr9 3218297 3526004 +SPATA6L chr9 4553386 4666674 +PPAPDC2 chr9 4662298 4665256 +CDC37L1 chr9 4679559 4708398 +AK3 chr9 4711155 4742043 +RCL1 chr9 4792869 4885917 +JAK2 chr9 4985033 5128183 +RLN1 chr9 5334969 5339873 +PLGRKT chr9 5357973 5437878 +KIAA2026 chr9 5881596 6007901 +RANBP6 chr9 6011043 6015618 +IL33 chr9 6215805 6257983 +UHRF2 chr9 6413151 6507054 +KDM4C chr9 6720863 7175648 +PTPRD chr9 8314246 10612723 +MPDZ chr9 13105703 13279589 +NFIB chr9 14081842 14398982 +ZDHHC21 chr9 14611069 14693469 +SNAPC3 chr9 15422702 15465951 +PSIP1 chr9 15464064 15511017 +CNTLN chr9 17134980 17503921 +RRAGA chr9 19049372 19051019 +HAUS6 chr9 19053141 19103117 +PLIN2 chr9 19108373 19149288 +DENND4C chr9 19230433 19374139 +RPS6 chr9 19375713 19380252 +SLC24A2 chr9 19507450 19786926 +MLLT3 chr9 20341663 20622542 +FOCAD chr9 20658308 20995954 +IFNW1 chr9 21140631 21142144 +KLHL9 chr9 21329670 21335379 +MTAP chr9 21802542 21931646 +C9orf53 chr9 21967137 21967738 +CDKN2A chr9 21967751 21995300 +CAAP1 chr9 26840683 26892802 +PLAA chr9 26904081 26947461 +IFT74 chr9 26947037 27062928 +MOB3B chr9 27325207 27529779 +C9orf72 chr9 27546544 27573864 +MIR873 chr9 28888877 28888953 +ACO1 chr9 32384618 32454767 +DDX58 chr9 32455300 32526322 +TOPORS chr9 32540542 32552551 +NDUFB6 chr9 32552997 32573160 +APTX chr9 32972604 33025166 +DNAJA1 chr9 33025209 33039905 +SMU1 chr9 33041762 33076665 +B4GALT1 chr9 33104080 33167354 +BAG1 chr9 33247818 33264761 +CHMP5 chr9 33264940 33281977 +NFX1 chr9 33290509 33371155 +NOL6 chr9 33461439 33473928 +PTENP1 chr9 33673502 33677497 +UBE2R2 chr9 33817565 33920402 +UBAP2 chr9 33921691 34048947 +DCAF12 chr9 34086385 34127397 +UBAP1 chr9 34179003 34252521 +NUDT2 chr9 34329504 34343709 +KIAA1161 chr9 34366668 34376851 +FAM219A chr9 34398182 34458568 +RPP25L chr9 34610483 34612101 +DCTN3 chr9 34613548 34620515 +SIGMAR1 chr9 34634719 34637806 +GALT chr9 34646642 34657110 +IL11RA chr9 34650699 34661889 +DNAJB5 chr9 34989638 34998897 +VCP chr9 35056061 35073246 +FANCG chr9 35073832 35080013 +PIGO chr9 35088685 35096591 +STOML2 chr9 35099888 35103154 +RUSC2 chr9 35490124 35561895 +TESK1 chr9 35605367 35610038 +MIR4667 chr9 35608091 35608156 +CCDC107 chr9 35658301 35661508 +TLN1 chr9 35696945 35732392 +CREB3 chr9 35732332 35737001 +GBA2 chr9 35736863 35749983 +RGP1 chr9 35749203 35758572 +HINT2 chr9 35812957 35815351 +TMEM8B chr9 35814448 35854844 +OR2S2 chr9 35957105 35958151 +RECK chr9 36036430 36124448 +GLIPR2 chr9 36136732 36163910 +CLTA chr9 36190853 36304778 +GNE chr9 36214438 36277053 +RNF38 chr9 36336393 36487545 +MIR4540 chr9 36864251 36864305 +ZCCHC7 chr9 37120536 37358146 +GRHPR chr9 37422663 37436987 +ZBTB5 chr9 37438111 37465396 +POLR1E chr9 37485932 37503694 +FBXO10 chr9 37510889 37588871 +TOMM5 chr9 37582643 37592639 +TRMT10B chr9 37753804 37778969 +EXOSC3 chr9 37766975 37801434 +DCAF10 chr9 37800499 37867663 +SLC25A51 chr9 37879400 37904350 +ALDH1B1 chr9 38392661 38398658 +IGFBPL1 chr9 38408991 38424444 +SPATA31A2 chr9 39884975 39891210 +FAM95B1 chr9 42466317 42474239 +AQP7P1 chr9 67272038 67289492 +PGM5P2 chr9 69080240 69147854 +CBWD6 chr9 69204538 69269662 +CBWD5 chr9 70432004 70497240 +CBWD3 chr9 70856397 70914929 +FAM122A chr9 71394964 71398609 +FXN chr9 71650175 71715094 +FAM189A2 chr9 71939488 72007371 +APBA1 chr9 72042446 72287222 +SMC5 chr9 72873937 72969804 +C9orf85 chr9 74526426 74600970 +ZFAND5 chr9 74966341 74980163 +C9orf41 chr9 77595936 77643339 +NMRK1 chr9 77675489 77703133 +OSTF1 chr9 77703459 77762181 +RFK chr9 79000433 79009433 +PRUNE2 chr9 79226292 79521003 +VPS13A-AS1 chr9 79791672 79792910 +GNAQ chr9 80331003 80646374 +CEP78 chr9 80850978 80894606 +PSAT1 chr9 80912059 80945009 +SPATA31D3 chr9 84558474 84565009 +IDNK chr9 86237964 86259045 +UBQLN1 chr9 86274878 86323118 +HNRNPK chr9 86582998 86595569 +MIR7-1 chr9 86584663 86584772 +RMI1 chr9 86595626 86618985 +NTRK2 chr9 87283466 87638505 +AGTPBP1 chr9 88161455 88356944 +NAA35 chr9 88556061 88637213 +GOLM1 chr9 88641061 88715088 +ISCA1 chr9 88879461 88897676 +ZCCHC6 chr9 88902648 88969369 +GAS1 chr9 89559279 89562104 +CDK20 chr9 90581356 90589668 +SPIN1 chr9 91003334 91093609 +MIR4289 chr9 91360751 91360820 +SHC3 chr9 91628060 91793682 +CKS2 chr9 91926113 91931618 +SECISBP2 chr9 91933421 91974557 +SEMA4D chr9 91975702 92113045 +GADD45G chr9 92219928 92221470 +DIRAS2 chr9 93372114 93405386 +AUH chr9 93976097 94124195 +NFIL3 chr9 94171327 94186144 +SPTLC1 chr9 94794281 94877666 +IARS chr9 94972489 95056038 +NOL8 chr9 95059640 95087918 +IPPK chr9 95375466 95432547 +BICD2 chr9 95473645 95527094 +ZNF484 chr9 95607874 95640304 +FGD3 chr9 95709733 95798518 +SUSD3 chr9 95820989 95847420 +NINJ1 chr9 95883771 95896570 +FAM120AOS chr9 96208776 96215874 +FAM120A chr9 96214004 96328397 +PHF2 chr9 96338689 96441869 +MIR4291 chr9 96581639 96581703 +HIATL1 chr9 97136833 97223324 +FBP1 chr9 97365415 97402531 +C9orf3 chr9 97488983 97849441 +PTCH1 chr9 98205262 98279339 +LINC00476 chr9 98521513 98638259 +ERCC6L2 chr9 98637983 98776842 +LINC00092 chr9 98782014 98790247 +CDC14B chr9 99252523 99382112 +ZNF510 chr9 99518147 99540411 +TDRD7 chr9 100174232 100258407 +TMOD1 chr9 100263462 100364030 +TSTD2 chr9 100362362 100395962 +NCBP1 chr9 100395908 100436030 +XPA chr9 100437191 100459639 +C9orf156 chr9 100666771 100684852 +ANP32B chr9 100745643 100778225 +NANS chr9 100819021 100845357 +TRIM14 chr9 100831557 100881494 +TBC1D2 chr9 100961311 101017915 +TGFBR1 chr9 101866320 101916474 +ALG2 chr9 101978708 101984238 +SEC61B chr9 101984346 101992897 +NR4A3 chr9 102584137 102629173 +STX17 chr9 102668915 102732618 +ERP44 chr9 102741461 102861322 +INVS chr9 102861538 103063282 +TEX10 chr9 103064359 103115221 +MSANTD3 chr9 103189438 103213511 +MSANTD3-TMEFF1 chr9 103204553 103339918 +TMEFF1 chr9 103204560 103339918 +LPPR1 chr9 103790991 104087417 +BAAT chr9 104122699 104145801 +MRPL50 chr9 104149915 104160896 +ZNF189 chr9 104161155 104172942 +TMEM246 chr9 104235453 104295819 +RNF20 chr9 104296133 104325622 +SMC2 chr9 106856541 106903698 +OR13F1 chr9 107266455 107267547 +NIPSNAP3A chr9 107509969 107522403 +ABCA1 chr9 107543283 107690518 +SLC44A1 chr9 108006903 108201452 +FSD1L chr9 108210077 108314714 +FKTN chr9 108320411 108403399 +TMEM38B chr9 108456825 108538893 +ZNF462 chr9 109625378 109775915 +KLF4 chr9 110247133 110252763 +ACTL7A chr9 111624603 111626035 +IKBKAP chr9 111629797 111696396 +FAM206A chr9 111696461 111713024 +CTNNAL1 chr9 111704851 111775809 +TMEM245 chr9 111777432 111882225 +PALM2-AKAP2 chr9 112542589 112934792 +TXN chr9 113006091 113018920 +LPAR1 chr9 113635543 113800981 +KIAA0368 chr9 114122972 114247025 +ZNF483 chr9 114287439 114340124 +GNG10 chr9 114423615 114432526 +UGCG chr9 114659046 114697649 +PTBP3 chr9 114980715 115095947 +HSDL2 chr9 115142217 115234690 +KIAA1958 chr9 115249127 115431677 +INIP chr9 115446206 115480516 +SNX30 chr9 115513118 115643951 +ZNF883 chr9 115759495 115774507 +ZFP37 chr9 115800660 115819039 +SLC31A2 chr9 115913222 115926417 +FKBP15 chr9 115923286 115983641 +SLC31A1 chr9 115983808 116028674 +CDC26 chr9 116018115 116037869 +PRPF4 chr9 116037623 116055185 +ALAD chr9 116148597 116163613 +POLE3 chr9 116169515 116172952 +AKNA chr9 117096436 117156685 +ATP6V1G1 chr9 117350026 117360653 +ASTN2 chr9 119187504 120177348 +TRIM32 chr9 119449581 119463579 +CDK5RAP2 chr9 123151147 123342448 +MEGF9 chr9 123363091 123476748 +FBXW2 chr9 123514256 123555690 +PSMD5 chr9 123577774 123605262 +PHF19 chr9 123617977 123639606 +TRAF1 chr9 123664671 123691451 +RAB14 chr9 123940415 123985292 +GSN chr9 123970072 124095121 +STOM chr9 124101355 124132531 +GGTA1P chr9 124207269 124262306 +NDUFA8 chr9 124894745 124922098 +RBM18 chr9 124999903 125027118 +MRRF chr9 125026882 125085743 +OR1N2 chr9 125315391 125316493 +PDCL chr9 125560668 125590910 +OR1K1 chr9 125562370 125563395 +RC3H2 chr9 125606835 125667620 +ZBTB6 chr9 125670335 125675609 +ZBTB26 chr9 125677845 125693779 +RABGAP1 chr9 125703112 125867145 +DENND1A chr9 126141933 126692431 +NEK6 chr9 127019885 127115586 +PSMB7 chr9 127115745 127177723 +MIR181A2HG chr9 127420746 127460910 +MIR181B2 chr9 127455989 127456077 +RPL35 chr9 127620159 127624260 +ARPC5L chr9 127624409 127640003 +GOLGA1 chr9 127640646 127710771 +SCAI chr9 127704887 127905785 +PPP6C chr9 127908852 127952218 +RABEPK chr9 127962821 127996437 +HSPA5 chr9 127997132 128003609 +GAPVD1 chr9 128024073 128129486 +MAPKAP1 chr9 128199672 128469513 +MVB12B chr9 129089128 129269320 +ZBTB43 chr9 129567285 129600489 +RALGPS1 chr9 129677053 129985445 +ANGPTL2 chr9 129849611 129885162 +GARNL3 chr9 129986544 130155939 +SLC2A8 chr9 130159421 130170703 +ZNF79 chr9 130186661 130207651 +RPL12 chr9 130209953 130213684 +LRSAM1 chr9 130213765 130265780 +STXBP1 chr9 130374544 130457460 +PTRH1 chr9 130455257 130487152 +C9orf117 chr9 130469268 130478281 +TOR2A chr9 130493803 130497604 +CDK9 chr9 130547958 130553066 +FPGS chr9 130556876 130576606 +ENG chr9 130577291 130617035 +AK1 chr9 130628759 130640022 +ST6GALNAC6 chr9 130647600 130667687 +ST6GALNAC4 chr9 130670165 130679317 +DPM2 chr9 130697378 130700763 +FAM102A chr9 130702858 130742792 +SLC25A25 chr9 130830480 130871524 +PTGES2 chr9 130882972 130890741 +C9orf16 chr9 130922539 130926207 +CIZ1 chr9 130928343 130966662 +GOLGA2 chr9 131018108 131038274 +SWI5 chr9 131037658 131051269 +TRUB2 chr9 131071382 131085021 +COQ4 chr9 131084815 131096351 +URM1 chr9 131133598 131153015 +MIR219-2 chr9 131154291 131155789 +CERCAM chr9 131174030 131199626 +ODF2 chr9 131217465 131263571 +GLE1 chr9 131266979 131304567 +SPTAN1 chr9 131314866 131395941 +WDR34 chr9 131395940 131419066 +SET chr9 131445703 131458679 +ZER1 chr9 131492065 131534693 +TBC1D13 chr9 131549483 131572711 +C9orf114 chr9 131581930 131592100 +CCBL1 chr9 131595221 131644773 +LRRC8A chr9 131644391 131680318 +PHYHD1 chr9 131683174 131704320 +DOLK chr9 131707809 131709898 +NUP188 chr9 131709978 131769375 +FAM73B chr9 131798900 131834361 +DOLPP1 chr9 131843379 131852717 +CRAT chr9 131857089 131873468 +PPP2R4 chr9 131873229 131911225 +NTMT1 chr9 132371163 132398209 +ASB6 chr9 132399171 132404444 +TOR1B chr9 132565432 132573560 +TOR1A chr9 132575223 132586413 +C9orf78 chr9 132589569 132598142 +USP20 chr9 132596977 132644107 +FNBP1 chr9 132649466 132805473 +GPR107 chr9 132815705 132902448 +FUBP3 chr9 133454352 133513739 +EXOSC2 chr9 133569108 133580248 +ABL1 chr9 133589333 133763062 +AIF1L chr9 133971863 133998539 +NUP214 chr9 134000948 134110057 +PPAPDC3 chr9 134165081 134184649 +PRRC2B chr9 134269480 134375584 +SNORD62A chr9 134361052 134361137 +POMT1 chr9 134378289 134399193 +UCK1 chr9 134399188 134406655 +MED27 chr9 134735494 134955295 +SETX chr9 135136743 135230372 +TTF1 chr9 135251008 135282209 +BARHL1 chr9 135457572 135465653 +C9orf9 chr9 135753414 135765588 +TSC1 chr9 135766735 135820020 +GTF3C5 chr9 135906076 135933890 +RALGDS chr9 135973107 136039301 +GBGT1 chr9 136028340 136039332 +SURF6 chr9 136197552 136203235 +MED22 chr9 136205160 136214986 +RPL7A chr9 136215069 136218281 +SNORD36C chr9 136217702 136217767 +SURF1 chr9 136218610 136223552 +SURF2 chr9 136223428 136228045 +SURF4 chr9 136228325 136242970 +REXO4 chr9 136271186 136283164 +ADAMTS13 chr9 136279478 136324508 +CACFD1 chr9 136325089 136335970 +SLC2A6 chr9 136336217 136344259 +ADAMTSL2 chr9 136397286 136440641 +VAV2 chr9 136627016 136857726 +LINC00094 chr9 136890561 136899788 +BRD3 chr9 136895427 136933657 +WDR5 chr9 137000487 137025093 +MIR4669 chr9 137271257 137271318 +FCN1 chr9 137801431 137809809 +OLFM1 chr9 137967268 138013025 +PPP1R26 chr9 138370925 138380739 +MRPS2 chr9 138391830 138396519 +LCN9 chr9 138555168 138558268 +CAMSAP1 chr9 138700333 138799074 +UBAC1 chr9 138824815 138853226 +NACC2 chr9 138898383 138987131 +GPSM1 chr9 139221932 139254057 +SDCCAG3 chr9 139296377 139305061 +PMPCA chr9 139305110 139318213 +SEC16A chr9 139334549 139372141 +NOTCH1 chr9 139388896 139440314 +MIR4673 chr9 139414020 139414078 +SNHG7 chr9 139615818 139622636 +LCN15 chr9 139654086 139660707 +TMEM141 chr9 139685807 139687709 +PHPT1 chr9 139743176 139745488 +EDF1 chr9 139756571 139760738 +TRAF2 chr9 139776364 139821059 +FBXW5 chr9 139834887 139839148 +PTGDS chr9 139871956 139879887 +C9orf142 chr9 139886870 139888436 +ABCA2 chr9 139901686 139923367 +C9orf139 chr9 139921916 139931234 +NPDC1 chr9 139933922 139940655 +SAPCD2 chr9 139956581 139965040 +UAP1L1 chr9 139971953 139978991 +MAN1B1 chr9 139981379 140003635 +DPP7 chr9 140004994 140009629 +MIR3621 chr9 140063638 140063722 +ANAPC2 chr9 140069236 140082989 +SSNA1 chr9 140083099 140084822 +TMEM203 chr9 140098534 140100090 +TUBB4B chr9 140135665 140138159 +EXD3 chr9 140201348 140317714 +PNPLA7 chr9 140354404 140444986 +MRPL41 chr9 140445651 140447007 +ARRDC1 chr9 140500106 140509812 +C9orf37 chr9 140509784 140513358 +EHMT1 chr9 140513444 140764468 +TUBB8 chr10 92828 120103 +ZMYND11 chr10 180405 300577 +DIP2C chr10 320130 735683 +PRR26 chr10 695888 711109 +LARP4B chr10 855484 977564 +GTPBP4 chr10 1034338 1065876 +IDI2-AS1 chr10 1068606 1090138 +IDI1 chr10 1085848 1095110 +WDR37 chr10 1095478 1178237 +ADARB2 chr10 1228073 1779670 +PFKP chr10 3108525 3179904 +PITRM1 chr10 3179920 3215003 +KLF6 chr10 3818188 3827473 +LINC00704 chr10 4692377 4720346 +AKR1C3 chr10 5077546 5149878 +TUBAL3 chr10 5435061 5446793 +NET1 chr10 5454514 5500426 +ASB13 chr10 5680830 5708558 +FAM208B chr10 5726801 5805703 +GDI2 chr10 5807186 5884095 +FBXO18 chr10 5931535 5979556 +IL2RA chr10 6052652 6104288 +RBM17 chr10 6130950 6159420 +PFKFB3 chr10 6186881 6277495 +KIN chr10 7792925 7829990 +ATP5C1 chr10 7830092 7849778 +TAF3 chr10 7860467 8058590 +CELF2 chr10 11047259 11378674 +ECHDC3 chr10 11784365 11806069 +DHTKD1 chr10 12110971 12165224 +SEC61A2 chr10 12171636 12211960 +NUDT5 chr10 12207324 12238143 +CDC123 chr10 12237964 12292588 +CAMK1D chr10 12391481 12877545 +MIR548Q chr10 12767253 12767352 +OPTN chr10 13141449 13180291 +SEPHS1 chr10 13359424 13390297 +PRPF18 chr10 13628927 13672868 +FRMD4A chr10 13685706 14504141 +MIR1265 chr10 14478575 14478660 +FAM107B chr10 14560556 14816896 +HSPA14 chr10 14880163 14913740 +SUV39H2 chr10 14920819 14946314 +DCLRE1C chr10 14939358 14996431 +ACBD7 chr10 15119522 15130775 +RPP38 chr10 15139179 15181817 +NMT2 chr10 15144583 15210692 +FAM171A1 chr10 15253642 15413061 +FAM188A chr10 15820169 15902519 +RSU1 chr10 16632610 16859527 +TRDMT1 chr10 17184253 17244053 +STAM chr10 17686124 17757913 +SLC39A12 chr10 18240768 18332221 +CACNB2 chr10 18429606 18830798 +NSUN6 chr10 18834490 18940551 +ARL5B chr10 18948334 18970568 +PLXDC2 chr10 20105168 20578785 +NEBL chr10 21068902 21463116 +SKIDA1 chr10 21802407 21814611 +DNAJC1 chr10 22045466 22292698 +COMMD3 chr10 22604903 22609235 +COMMD3-BMI1 chr10 22605317 22618471 +PIP4K2A chr10 22823778 23003484 +MSRB2 chr10 23384435 23410942 +OTUD1 chr10 23728198 23731308 +ARHGAP21 chr10 24872538 25012597 +PRTFDC1 chr10 25137536 25241533 +THNSL1 chr10 25305587 25315593 +GPR158-AS1 chr10 25447001 25465205 +GPR158 chr10 25463991 25891155 +APBB1IP chr10 26727132 26856732 +PDSS1 chr10 26986588 27035727 +ABI1 chr10 27035522 27150016 +LINC00202-1 chr10 27220135 27230930 +ANKRD26 chr10 27280843 27389421 +YME1L1 chr10 27399383 27444195 +ACBD5 chr10 27484146 27531059 +RAB18 chr10 27793197 27831143 +MPP7 chr10 28339922 28623415 +WAC-AS1 chr10 28811581 28821672 +WAC chr10 28821422 28912041 +PTCHD3P1 chr10 29698331 29776674 +KIAA1462 chr10 30301729 30404423 +MTPAP chr10 30598730 30663377 +MAP3K8 chr10 30722866 30750762 +ZNF438 chr10 31109136 31320866 +ZEB1 chr10 31607424 31818742 +ARHGAP12 chr10 32094365 32217742 +KIF5B chr10 32297938 32345359 +EPC1 chr10 32556679 32667726 +CCDC7 chr10 32735068 32863492 +ITGB1 chr10 33189247 33294720 +NRP1 chr10 33466420 33625190 +CUL2 chr10 35297479 35379570 +CREM chr10 35415719 35501886 +CCNY chr10 35535953 35860852 +FZD8 chr10 35927177 35930362 +ZNF248 chr10 38091751 38147034 +ZNF25 chr10 38238500 38265561 +ZNF33A chr10 38299578 38354016 +ZNF37A chr10 38383264 38414472 +HSD17B7P2 chr10 38645305 38667433 +ACTR3BP5 chr10 38989555 38991356 +LINC00839 chr10 42970991 42990784 +ZNF37BP chr10 43008958 43048270 +ZNF33B chr10 43084555 43133992 +BMS1 chr10 43278249 43330385 +CSGALNACT2 chr10 43633934 43680756 +HNRNPF chr10 43881065 43904614 +ZNF239 chr10 44051792 44070066 +ZNF485 chr10 44101855 44113351 +ZNF32 chr10 44139307 44144304 +LINC00841 chr10 44434451 44439914 +CXCL12 chr10 44793038 44881941 +RASSF4 chr10 45454855 45491339 +ZNF22 chr10 45495923 45500774 +MIR3156-1 chr10 45659462 45659536 +ALOX5 chr10 45869661 45941561 +MARCH8 chr10 45950035 46090354 +FAM21C chr10 46222648 46288409 +AGAP4 chr10 46321042 46349323 +BMS1P1 chr10 46737612 46761205 +GLUD1P7 chr10 46772794 46778894 +FAM35BP chr10 46897592 46939143 +BMS1P2 chr10 47228366 47241942 +FAM21B chr10 47894023 47949412 +BMS1P6 chr10 48187416 48199256 +AGAP9 chr10 48189612 48237508 +ZNF488 chr10 48355024 48373866 +FRMPD2P1 chr10 48844036 48868504 +BMS1P5 chr10 48901102 48950972 +MAPK8 chr10 49514698 49647403 +ARHGAP22 chr10 49654077 49864310 +VSTM4 chr10 50222290 50323554 +FAM170B-AS1 chr10 50329884 50359592 +OGDHL chr10 50942689 50970425 +PARG chr10 51026325 51130715 +AGAP8 chr10 51224681 51371321 +NCOA4 chr10 51565108 51590734 +TIMM23 chr10 51592080 51623365 +AGAP6 chr10 51748078 51770259 +FAM21A chr10 51827648 51893269 +SGMS1 chr10 52065360 52384923 +CSTF2T chr10 53455247 53459355 +PCDH15 chr10 55562531 57387702 +MIR548F1 chr10 56367634 56367717 +MTRNR2L5 chr10 57358750 57360488 +ZWINT chr10 58116989 58121036 +CISD1 chr10 60028818 60049346 +UBE2D1 chr10 60094735 60130513 +TFAM chr10 60144782 60158981 +FAM133CP chr10 60475314 60476017 +PHYHIPL chr10 60936350 61007534 +FAM13C chr10 61005890 61122939 +CCDC6 chr10 61548521 61666414 +ANK3 chr10 61786056 62493248 +C10orf107 chr10 63422719 63526524 +ARID5B chr10 63661059 63856703 +ZNF365 chr10 64133951 64431771 +ADO chr10 64564516 64568238 +EGR2 chr10 64571756 64679660 +NRBF2 chr10 64893050 64914783 +JMJD1C chr10 64926981 65225722 +REEP3 chr10 65281123 65384883 +CTNNA3 chr10 67672276 69455927 +LRRTM3 chr10 68685764 68859588 +DNAJC12 chr10 69556427 69597924 +SIRT1 chr10 69644427 69678147 +HERC4 chr10 69681665 69835105 +PBLD chr10 70042417 70092806 +HNRNPH3 chr10 70090931 70102948 +RUFY2 chr10 70100864 70167051 +SLC25A16 chr10 70237756 70287231 +TET1 chr10 70320413 70454239 +CCAR1 chr10 70480769 70552134 +DDX50 chr10 70661034 70706603 +DDX21 chr10 70715884 70744829 +KIAA1279 chr10 70748487 70776738 +SRGN chr10 70847862 70864567 +VPS26A chr10 70883268 70932617 +SUPV3L1 chr10 70939988 70968855 +HK1 chr10 71029740 71161638 +TSPAN15 chr10 71211229 71267425 +COL13A1 chr10 71561644 71724031 +TYSND1 chr10 71897737 71906432 +SAR1A chr10 71907045 71930279 +PPA1 chr10 71962586 71993667 +EIF4EBP2 chr10 72164135 72188374 +PALD1 chr10 72238577 72328205 +SGPL1 chr10 72575717 72640930 +PCBD1 chr10 72642037 72648541 +SLC29A3 chr10 73079015 73123142 +C10orf105 chr10 73471458 73497581 +C10orf54 chr10 73507316 73533255 +PSAP chr10 73576055 73611126 +CHST3 chr10 73724123 73773322 +ASCC1 chr10 73856278 73976892 +ANAPC16 chr10 73975787 73995618 +DDIT4 chr10 74033678 74035794 +DNAJB12 chr10 74092588 74114988 +MICU1 chr10 74127098 74385899 +MCU chr10 74451889 74647452 +P4HA1 chr10 74766975 74856732 +ECD chr10 74889913 74928813 +DNAJC9 chr10 74943120 75008620 +MRPS16 chr10 75006510 75012451 +ANXA7 chr10 75134859 75173834 +PPP3CB chr10 75196186 75255782 +USP54 chr10 75257296 75385711 +AGAP5 chr10 75434033 75457639 +BMS1P4 chr10 75475601 75490227 +SEC24C chr10 75504120 75531919 +FUT11 chr10 75532049 75540009 +CHCHD1 chr10 75541805 75543410 +NDST2 chr10 75561669 75571589 +CAMK2G chr10 75572259 75634343 +PLAU chr10 75668935 75677255 +C10orf55 chr10 75669727 75682535 +VCL chr10 75757872 75879918 +AP3M1 chr10 75881524 75910821 +ADK chr10 75910960 76469061 +KAT6B chr10 76585340 76792380 +SAMD8 chr10 76859344 76941881 +VDAC2 chr10 76969912 76991206 +ZNF503-AS1 chr10 77029577 77133258 +KCNMA1 chr10 78629359 79398353 +DLG5 chr10 79550549 79686378 +POLR3A chr10 79734907 79789303 +RPS24 chr10 79793518 79816570 +LINC00595 chr10 80027085 80090352 +ZMIZ1 chr10 80828792 81076276 +PPIF chr10 81107225 81115093 +ZCCHC24 chr10 81142081 81205383 +SFTPD chr10 81697496 81742370 +TMEM254 chr10 81838402 81852313 +FAM213A chr10 82167585 82192753 +TSPAN14 chr10 82213922 82292879 +GHITM chr10 85899196 85913001 +CDHR1 chr10 85954410 85979377 +RGR chr10 86004809 86019716 +CCSER2 chr10 86088342 86278273 +GRID1 chr10 87359312 88126250 +WAPAL chr10 88195013 88281572 +BMPR1A chr10 88516407 88692595 +AGAP11 chr10 88752163 88769960 +GLUD1 chr10 88810243 88854623 +FAM35A chr10 88853918 88951225 +MINPP1 chr10 89264632 89313217 +ATAD1 chr10 89511269 89601100 +PTEN chr10 89622870 89731687 +SNORD74 chr10 89754375 89754452 +STAMBPL1 chr10 90639491 90734910 +ACTA2 chr10 90694831 90751147 +FAS chr10 90750414 90775542 +CH25H chr10 90965694 90967071 +LIPA chr10 90973326 91174314 +IFIT2 chr10 91061712 91069033 +IFIT3 chr10 91087651 91100728 +IFIT1 chr10 91152303 91163745 +IFIT5 chr10 91174343 91180758 +PANK1 chr10 91342745 91405215 +RPP30 chr10 92631473 92668312 +PCGF5 chr10 92979908 93044088 +HECTD2 chr10 93170096 93274586 +TNKS2 chr10 93558069 93625033 +FGFBP3 chr10 93666346 93669240 +BTAF1 chr10 93683526 93790082 +CPEB3 chr10 93806449 94050844 +MARCH5 chr10 94050920 94113721 +IDE chr10 94211441 94333833 +EXOC6 chr10 94590935 94819250 +CYP26C1 chr10 94821021 94828454 +FRA10AC1 chr10 95427640 95462329 +PIPSL chr10 95717948 95721297 +NOC3L chr10 96075004 96122716 +PDLIM1 chr10 96997329 97050781 +SORBS1 chr10 97071528 97321171 +ALDH18A1 chr10 97365696 97416463 +TCTN3 chr10 97423158 97453900 +ENTPD1 chr10 97471536 97637023 +CCNJ chr10 97803151 97820627 +ZNF518A chr10 97889472 97965044 +BLNK chr10 97951458 98031344 +OPALIN chr10 98102973 98119092 +TM9SF3 chr10 98277866 98347209 +RPL13AP5 chr10 98510045 98510675 +LCOR chr10 98592017 98740800 +ARHGAP19-SLIT1 chr10 98757795 99052413 +SLIT1 chr10 98757795 98945677 +RRP12 chr10 99116115 99161127 +PGAM1 chr10 99185917 99193198 +EXOSC1 chr10 99195899 99205774 +ZDHHC16 chr10 99205927 99217127 +MMS19 chr10 99218081 99258551 +ANKRD2 chr10 99332198 99343641 +MORN4 chr10 99374310 99393344 +AVPI1 chr10 99437181 99447080 +ZFYVE27 chr10 99496878 99520664 +CRTAC1 chr10 99624757 99790585 +GOLGA7B chr10 99627889 99631294 +R3HCC1L chr10 99894387 100004654 +HPS1 chr10 100175955 100206684 +GOT1 chr10 101156627 101190381 +NKX2-3 chr10 101292690 101296278 +SLC25A28 chr10 101370282 101380366 +CUTC chr10 101462315 101515891 +COX15 chr10 101471601 101491857 +DNMBP chr10 101635334 101769676 +ERLIN1 chr10 101909851 101948091 +CHUK chr10 101948055 101989376 +CWF19L1 chr10 101992055 102027437 +BLOC1S2 chr10 102033713 102046469 +SCD chr10 102106881 102124591 +LINC00263 chr10 102133372 102143125 +SEC31B chr10 102246399 102289628 +NDUFB8 chr10 102265385 102289638 +HIF1AN chr10 102288829 102319755 +FAM178A chr10 102672326 102724893 +MRPL43 chr10 102729215 102747272 +LZTS2 chr10 102756375 102767593 +SFXN3 chr10 102790991 102800998 +KAZALD1 chr10 102821598 102827888 +BTRC chr10 103113820 103317078 +DPCD chr10 103330317 103369425 +POLL chr10 103338639 103348027 +FBXW4 chr10 103370423 103455052 +NPM3 chr10 103541082 103543170 +MGEA5 chr10 103544200 103578696 +C10orf76 chr10 103605356 103815950 +LDB1 chr10 103867317 103880210 +PPRC1 chr10 103892787 103910082 +NOLC1 chr10 103911933 103923627 +GBF1 chr10 104005289 104142656 +FBXL15 chr10 104178946 104182893 +CUEDC2 chr10 104183002 104192418 +TMEM180 chr10 104221149 104236802 +ACTR1A chr10 104238986 104262482 +TRIM8 chr10 104404253 104418164 +ARL3 chr10 104433488 104474164 +SFXN2 chr10 104474295 104503249 +WBP1L chr10 104503727 104576021 +C10orf32 chr10 104613980 104624718 +AS3MT chr10 104629273 104661656 +NT5C2 chr10 104845940 104953056 +INA chr10 105036920 105050108 +PCGF6 chr10 105062553 105110891 +USMG5 chr10 105148798 105156223 +PDCD11 chr10 105156405 105206049 +OBFC1 chr10 105642300 105677963 +SFR1 chr10 105881816 105886143 +GSTO1 chr10 105995114 106027217 +GSTO2 chr10 106028631 106064703 +ITPRIP chr10 106071894 106098162 +XPNPEP1 chr10 111624524 111683311 +ADD3 chr10 111756126 111895323 +MXI1 chr10 111967363 112047123 +SMNDC1 chr10 112050488 112064709 +SMC3 chr10 112327449 112364394 +RBM20 chr10 112404155 112599227 +PDCD4 chr10 112631565 112659764 +BBIP1 chr10 112658488 112679032 +SHOC2 chr10 112679301 112773425 +ACSL5 chr10 114133776 114188138 +ZDHHC6 chr10 114190058 114206672 +VTI1A chr10 114206756 114578503 +TCF7L2 chr10 114710009 114927437 +DCLRE1A chr10 115594488 115614142 +NHLRC2 chr10 115614420 115676953 +FAM160B1 chr10 116581503 116659591 +TRUB1 chr10 116697952 116737430 +GFRA1 chr10 117816444 118032979 +HSPA12A chr10 118430703 118502085 +ENO4 chr10 118609023 118671299 +KIAA1598 chr10 118643742 118886097 +CASC2 chr10 119805790 119969663 +FAM204A chr10 120065401 120101840 +CACUL1 chr10 120433679 120514761 +EIF3A chr10 120794356 120840316 +FAM45A chr10 120863598 120897496 +SFXN4 chr10 120900279 120925179 +PRDX3 chr10 120927215 120938345 +GRK5 chr10 120967101 121215131 +RGS10 chr10 121259340 121302220 +TIAL1 chr10 121334199 121356541 +INPP5F chr10 121485609 121588652 +MCMBP chr10 121588972 121652068 +SEC23IP chr10 121652223 121702014 +PPAPDC1A chr10 122216466 122349367 +WDR11-AS1 chr10 122521324 122610721 +WDR11 chr10 122610687 122669036 +ATE1 chr10 123499939 123688316 +NSMCE4A chr10 123716603 123734732 +TACC2 chr10 123748689 124014060 +PLEKHA1 chr10 124134212 124191867 +HTRA1 chr10 124221041 124274424 +C10orf88 chr10 124690419 124713919 +PSTK chr10 124713897 124757029 +IKZF5 chr10 124750322 124768333 +ACADSB chr10 124768495 124817827 +HMX3 chr10 124895478 124897257 +BUB3 chr10 124913793 124924886 +CHST15 chr10 125767184 125853206 +OAT chr10 126085872 126107545 +LHPP chr10 126150403 126306457 +FAM53B chr10 126307861 126432838 +METTL10 chr10 126436718 126480439 +FAM175B chr10 126490354 126525239 +CTBP2 chr10 126676421 126849739 +C10orf137 chr10 127408084 127452712 +UROS chr10 127477146 127511817 +BCCIP chr10 127512115 127542264 +DHX32 chr10 127524906 127585005 +C10orf90 chr10 128113566 128359079 +DOCK1 chr10 128593978 129250781 +CLRN3 chr10 129676105 129691211 +PTPRE chr10 129705325 129884119 +GLRX3 chr10 131934663 131982785 +PPP2R2D chr10 133747955 133773331 +BNIP3 chr10 133781578 133795435 +DPYSL4 chr10 134000404 134019280 +LRRC27 chr10 134145614 134195010 +INPP5A chr10 134351324 134596979 +TTC40 chr10 134621896 134756327 +TUBGCP2 chr10 135093135 135125841 +ZNF511 chr10 135121979 135166033 +PRAP1 chr10 135160650 135166187 +ECHS1 chr10 135175984 135187193 +MTG1 chr10 135207598 135234811 +BET1L chr11 167784 207428 +RIC8A chr11 207511 215113 +SIRT3 chr11 215458 236931 +PSMD13 chr11 236546 252983 +IFITM2 chr11 307631 315272 +IFITM1 chr11 313506 315272 +B4GALNT4 chr11 369796 382116 +PTDSS2 chr11 448268 491393 +RNH1 chr11 494512 507300 +HRAS chr11 532242 537287 +C11orf35 chr11 554855 560779 +PHRF1 chr11 576486 612222 +IRF7 chr11 612553 615999 +DEAF1 chr11 644233 706715 +EPS8L2 chr11 694438 727727 +TMEM80 chr11 695428 705028 +TALDO1 chr11 747329 765024 +PDDC1 chr11 767220 777488 +CEND1 chr11 787104 790123 +SLC25A22 chr11 790475 798316 +PIDD chr11 799179 809753 +RPLP2 chr11 809647 812880 +PNPLA2 chr11 818902 825573 +EFCAB4A chr11 826144 831991 +CD151 chr11 832843 839831 +POLR2L chr11 837356 842545 +CHID1 chr11 867357 915058 +TOLLIP chr11 1295601 1330884 +BRSK2 chr11 1411129 1483919 +MOB2 chr11 1490687 1522477 +KRTAP5-6 chr11 1718425 1718985 +IFITM10 chr11 1753640 1771821 +CTSD chr11 1773982 1785222 +MRPL23 chr11 1968508 2005752 +H19 chr11 2016406 2022700 +TSPAN32 chr11 2323227 2339430 +CD81 chr11 2397407 2418649 +TSSC4 chr11 2421718 2425106 +KCNQ1OT1 chr11 2629558 2721224 +SLC22A18 chr11 2920951 2946476 +PHLDA2 chr11 2949503 2950685 +NAP1L4 chr11 2965667 3013607 +CARS chr11 3022152 3078843 +OSBPL5 chr11 3108346 3187969 +ZNF195 chr11 3360491 3400448 +TRPC2 chr11 3631131 3658789 +NUP98 chr11 3692313 3819022 +PGAP2 chr11 3818954 3847601 +RHOG chr11 3848208 3862213 +STIM1 chr11 3875757 4114439 +RRM1 chr11 4115937 4160106 +OR52B4 chr11 4388493 4389616 +TRIM68 chr11 4619902 4629489 +OR51E1 chr11 4664650 4676718 +OR51G1 chr11 4944531 4945637 +HBB chr11 5246694 5250625 +HBD chr11 5253908 5256600 +OR51Q1 chr11 5443341 5444436 +TRIM34 chr11 5640994 5665628 +TRIM22 chr11 5710919 5758319 +OR52E4 chr11 5905501 5906527 +FAM160A2 chr11 6232565 6255941 +CNGA4 chr11 6255995 6265659 +SMPD1 chr11 6411655 6416228 +APBB1 chr11 6416355 6440644 +TRIM3 chr11 6469843 6495689 +ARFIP2 chr11 6496910 6502666 +TIMM10B chr11 6502677 6505909 +DNHD1 chr11 6518490 6614988 +RRP8 chr11 6616305 6624850 +ILK chr11 6624961 6632102 +TAF10 chr11 6627526 6633898 +TPP1 chr11 6634000 6640692 +MRPL17 chr11 6702013 6704632 +OR10A5 chr11 6866883 6867936 +OLFML1 chr11 7506619 7532608 +PPFIBP2 chr11 7534529 7678358 +EIF3F chr11 7991798 8023409 +TUB chr11 8040791 8127659 +RIC3 chr11 8127597 8190602 +LMO1 chr11 8245851 8290263 +RPL27A chr11 8703958 8736306 +ST5 chr11 8714898 8932498 +AKIP1 chr11 8932686 8941631 +TMEM9B chr11 8968841 8986558 +DENND5A chr11 9160372 9286937 +TMEM41B chr11 9302201 9336327 +IPO7 chr11 9406169 9469673 +ZNF143 chr11 9481866 9550071 +SBF2 chr11 9800214 10315754 +MTRNR2L8 chr11 10529434 10530723 +RNF141 chr11 10533225 10562777 +LYVE1 chr11 10578513 10633236 +CTR9 chr11 10772534 10801290 +EIF4G2 chr11 10818597 10830657 +ZBED5 chr11 10833621 10880343 +CSNK2A3 chr11 11373489 11374904 +USP47 chr11 11862970 11980870 +DKK3 chr11 11984653 12031316 +PARVA chr11 12398732 12552348 +TEAD1 chr11 12695969 12966298 +ARNTL chr11 13298199 13408813 +BTBD10 chr11 13409548 13484844 +FAR1 chr11 13690217 13753893 +COPB1 chr11 14464986 14521573 +PSMA1 chr11 14515329 14541890 +CYP2R1 chr11 14899553 14913798 +CALCA chr11 14988214 14993900 +SOX6 chr11 15987995 16761138 +C11orf58 chr11 16634679 16778428 +RPS13 chr11 17095936 17099334 +PIK3C2A chr11 17099277 17229530 +NUCB2 chr11 17229700 17371521 +KCNJ11 chr11 17407406 17410878 +ABCC8 chr11 17414432 17498449 +SERGEF chr11 17809595 18034709 +SAAL1 chr11 18091482 18127638 +MRGPRX4 chr11 18194384 18195827 +HPS5 chr11 18300223 18343745 +GTF2H1 chr11 18343842 18388591 +LDHA chr11 18415935 18429972 +TSG101 chr11 18489883 18548779 +UEVLD chr11 18551156 18610294 +SPTY2D1 chr11 18627948 18656338 +TMEM86A chr11 18714669 18726332 +ZDHHC13 chr11 19138646 19197969 +CSRP3 chr11 19203578 19232120 +NAV2 chr11 19372271 20143144 +NAV2-AS5 chr11 19524219 19528776 +HTATIP2 chr11 20385231 20405329 +PRMT3 chr11 20409076 20530840 +FANCF chr11 22644079 22647387 +SVIP chr11 22835345 22851845 +LUZP2 chr11 24518516 25104150 +ANO3 chr11 26210829 26684835 +FIBIN chr11 27015628 27018630 +LIN7C chr11 27516123 27528320 +BDNF-AS chr11 27528385 27719721 +KIF18A chr11 28042167 28129855 +METTL15 chr11 28129795 28355054 +ARL14EP chr11 30344598 30359774 +MPPED2 chr11 30406040 30608419 +DNAJC24 chr11 31391387 31453396 +IMMP1L chr11 31453948 31531192 +ELP4 chr11 31531297 31805546 +RCN1 chr11 31833939 32127301 +EIF3M chr11 32605344 32627808 +QSER1 chr11 32914724 33014862 +DEPDC7 chr11 33037410 33055128 +TCP11L1 chr11 33060963 33127489 +CSTF3 chr11 33098734 33183917 +HIPK3 chr11 33278218 33378569 +CD59 chr11 33719807 33757991 +FBXO3 chr11 33762485 33796089 +CAPRIN1 chr11 34073230 34122703 +NAT10 chr11 34127149 34169217 +CAT chr11 34460472 34493609 +APIP chr11 34874641 34938046 +PDHX chr11 34937376 35042138 +SLC1A2 chr11 35272753 35441610 +PAMR1 chr11 35453370 35551848 +TRIM44 chr11 35684353 35829775 +LDLRAD3 chr11 35965531 36253686 +COMMD9 chr11 36295051 36310999 +TRAF6 chr11 36508577 36531822 +C11orf74 chr11 36616051 36694823 +LRRC4C chr11 40135753 41481323 +API5 chr11 43333513 43366079 +TTC17 chr11 43380482 43516483 +HSD17B12 chr11 43577986 43878167 +ALKBH3 chr11 43902361 43941816 +ACCSL chr11 44069531 44081527 +EXT2 chr11 44117099 44266979 +CD82 chr11 44585977 44641913 +TP53I11 chr11 44907454 44972840 +SLC35C1 chr11 45825623 45834566 +CRY2 chr11 45868669 45904798 +MAPK8IP1 chr11 45907202 45928016 +PEX16 chr11 45931220 45940363 +PHF21A chr11 45950871 46142985 +MDK chr11 46402306 46405375 +AMBRA1 chr11 46417964 46615675 +HARBI1 chr11 46624411 46639459 +ATG13 chr11 46638826 46696368 +ARHGAP1 chr11 46698630 46722165 +ZNF408 chr11 46722368 46727462 +CKAP5 chr11 46764598 46867847 +MIR5582 chr11 46774675 46774742 +LRP4 chr11 46878419 46940193 +C11orf49 chr11 46958240 47185936 +ARFGAP2 chr11 47185848 47198676 +ACP2 chr11 47260853 47270457 +NR1H3 chr11 47269851 47290396 +MADD chr11 47290712 47351582 +SLC39A13 chr11 47428683 47438047 +PSMC3 chr11 47440320 47447993 +CELF1 chr11 47487496 47587121 +NDUFS3 chr11 47586888 47606114 +PTPMT1 chr11 47586982 47595013 +KBTBD4 chr11 47593749 47600567 +FAM180B chr11 47608198 47610746 +MTCH2 chr11 47638867 47664175 +FNBP4 chr11 47738072 47788995 +NUP160 chr11 47799639 47870107 +PTPRJ chr11 48002113 48189670 +OR4S1 chr11 48327775 48328704 +FOLH1 chr11 49168187 49230222 +TRIM48 chr11 55029658 55038595 +OR5D13 chr11 55540914 55541858 +OR7E5P chr11 55746179 55753881 +OR5T3 chr11 56019676 56020698 +OR5M8 chr11 56257825 56258871 +LRRC55 chr11 56949221 56959191 +SSRP1 chr11 57093459 57103351 +SLC43A3 chr11 57174427 57195053 +TIMM10 chr11 57295936 57298276 +UBE2L6 chr11 57319129 57335757 +YPEL4 chr11 57412560 57417417 +CLP1 chr11 57416465 57429340 +ZDHHC5 chr11 57435219 57468659 +MED19 chr11 57471186 57479693 +TMX2 chr11 57480072 57508445 +TMX2-CTNND1 chr11 57480077 57559058 +C11orf31 chr11 57508825 57510986 +OR10Q1 chr11 57995354 57996390 +LPXN chr11 58294344 58345693 +ZFP91 chr11 58346584 58388515 +CNTF chr11 58390146 58393198 +OR5A1 chr11 59210617 59211667 +PATL1 chr11 59404189 59436453 +OR10V2P chr11 59516284 59517047 +MRPL16 chr11 59573608 59578345 +MS4A6A chr11 59939081 59952139 +MS4A4A chr11 60048014 60076445 +MS4A7 chr11 60145955 60163424 +CCDC86 chr11 60609544 60618554 +PTGDR2 chr11 60618413 60623444 +TMEM109 chr11 60681346 60690915 +TMEM132A chr11 60691935 60704631 +PGA4 chr11 60989688 61018929 +DDB1 chr11 61066923 61110068 +DAK chr11 61100682 61120767 +TMEM138 chr11 61129473 61136981 +TMEM216 chr11 61159159 61166335 +CPSF7 chr11 61170121 61197503 +SDHAF2 chr11 61197514 61215001 +MIR4488 chr11 61276068 61276129 +MYRF chr11 61520114 61555990 +TMEM258 chr11 61535973 61560274 +FEN1 chr11 61560109 61564716 +FADS2 chr11 61560452 61634826 +FADS1 chr11 61567099 61596790 +BEST1 chr11 61717293 61732987 +FTH1 chr11 61727190 61735132 +SCGB2A2 chr11 62037627 62040628 +ASRGL1 chr11 62104920 62160882 +EEF1G chr11 62327073 62342401 +TUT1 chr11 62342517 62359649 +MTA2 chr11 62360686 62369312 +ROM1 chr11 62379194 62382592 +B3GAT3 chr11 62382768 62389647 +GANAB chr11 62392298 62414104 +INTS5 chr11 62414320 62420774 +C11orf48 chr11 62430287 62439727 +C11orf83 chr11 62437745 62441159 +UBXN1 chr11 62443970 62446567 +BSCL2 chr11 62457747 62477317 +HNRNPUL2-BSCL2 chr11 62457747 62494856 +TTC9C chr11 62495541 62507765 +ZBTB3 chr11 62515791 62521660 +POLR2G chr11 62529016 62534182 +TAF6L chr11 62538775 62554814 +TMEM223 chr11 62539101 62559493 +TMEM179B chr11 62554887 62557877 +NXF1 chr11 62559595 62573774 +STX5 chr11 62574369 62599560 +WDR74 chr11 62599814 62609281 +SNHG1 chr11 62619460 62623386 +SLC3A2 chr11 62623518 62656352 +SLC22A9 chr11 63137261 63177766 +RARRES3 chr11 63304281 63313934 +PLA2G16 chr11 63340667 63384355 +RTN3 chr11 63448918 63527363 +MARK2 chr11 63606400 63678491 +NAA40 chr11 63706431 63724800 +COX8A chr11 63742079 63744015 +OTUB1 chr11 63753325 63769283 +FLRT1 chr11 63870660 63886645 +STIP1 chr11 63952744 63972015 +FERMT3 chr11 63974150 63991354 +TRPT1 chr11 63991271 63993726 +NUDT22 chr11 63993413 63998158 +DNAJC4 chr11 63997750 64001758 +VEGFB chr11 64002010 64006259 +FKBP2 chr11 64008475 64011604 +BAD chr11 64037302 64052176 +GPR137 chr11 64037534 64056972 +ESRRA chr11 64073044 64084215 +TRMT112 chr11 64083932 64085556 +PRDX5 chr11 64085560 64089283 +NRXN2 chr11 64373646 64490660 +PYGM chr11 64513861 64527769 +SF1 chr11 64532078 64546258 +EHD1 chr11 64619114 64655768 +GPHA2 chr11 64701943 64703360 +ARL2 chr11 64781585 64789656 +SNX15 chr11 64794880 64808044 +ZFPL1 chr11 64851695 64855870 +VPS51 chr11 64856796 64879332 +TM7SF2 chr11 64879317 64883856 +ZNHIT2 chr11 64883875 64885170 +FAU chr11 64888100 64889945 +SYVN1 chr11 64889252 64902004 +MRPL49 chr11 64889655 64894843 +CAPN1 chr11 64948037 64979477 +POLA2 chr11 65029233 65073060 +CDC42EP2 chr11 65082289 65089900 +DPF2 chr11 65101225 65120720 +FRMD8 chr11 65154070 65180996 +NEAT1 chr11 65190245 65213011 +MALAT1 chr11 65265233 65273940 +SCYL1 chr11 65292548 65306175 +LTBP3 chr11 65306276 65326401 +SSSCA1 chr11 65337901 65341413 +EHBP1L1 chr11 65343509 65360121 +MAP3K11 chr11 65365226 65382853 +PCNXL3 chr11 65383244 65404910 +RELA chr11 65421067 65430565 +KAT5 chr11 65479467 65487075 +RNASEH2C chr11 65482367 65488418 +AP5B1 chr11 65543364 65548273 +CFL1 chr11 65590493 65629497 +MUS81 chr11 65624597 65635124 +FIBP chr11 65651212 65656010 +CCDC85B chr11 65657875 65659105 +C11orf68 chr11 65684279 65686588 +DRAP1 chr11 65686728 65689032 +SART1 chr11 65729160 65747299 +EIF1AD chr11 65764016 65769647 +BANF1 chr11 65769550 65771620 +SF3B2 chr11 65818200 65836779 +PACS1 chr11 65837834 66012218 +KLC2 chr11 66024765 66035331 +RAB1B chr11 66036004 66044963 +YIF1A chr11 66052051 66056641 +BRMS1 chr11 66104804 66112596 +B3GNT1 chr11 66112843 66115163 +SLC29A2 chr11 66129992 66139685 +MRPL11 chr11 66202546 66234209 +DPP3 chr11 66247484 66277130 +BBS1 chr11 66278077 66301098 +ZDHHC24 chr11 66288108 66313709 +CTSF chr11 66330934 66336312 +CCDC87 chr11 66357640 66360554 +CCS chr11 66360292 66373490 +RBM14 chr11 66384053 66394818 +RBM14-RBM4 chr11 66384097 66413940 +RBM4 chr11 66384097 66434153 +RBM4B chr11 66432469 66445392 +RCE1 chr11 66610306 66614017 +PC chr11 66615704 66725847 +LRFN4 chr11 66624118 66627946 +KDM2A chr11 66886740 67025558 +ANKRD13D chr11 67056018 67069956 +POLD4 chr11 67118248 67124443 +RAD9A chr11 67159176 67165881 +PPP1CA chr11 67165654 67188654 +CARNS1 chr11 67182439 67193078 +RPS6KB2 chr11 67195931 67202872 +CORO1B chr11 67205519 67211292 +TMEM134 chr11 67231824 67236743 +AIP chr11 67250512 67258574 +CDK2AP2 chr11 67273968 67276120 +GSTP1 chr11 67351066 67354131 +NDUFV1 chr11 67374323 67380006 +NUDT8 chr11 67395409 67397401 +ACY3 chr11 67410026 67418130 +ALDH3B2 chr11 67429633 67448671 +UNC93B1 chr11 67758575 67772452 +NDUFS8 chr11 67798084 67804111 +TCIRG1 chr11 67806483 67818362 +CHKA chr11 67820326 67888911 +SUV420H1 chr11 67922330 67981295 +C11orf24 chr11 68028803 68039469 +LRP5 chr11 68080077 68216743 +PPP6R3 chr11 68228186 68382802 +CPT1A chr11 68522088 68611878 +MRPL21 chr11 68658744 68671303 +IGHMBP2 chr11 68671310 68708070 +MYEOV chr11 69061605 69182494 +CCND1 chr11 69455855 69469242 +ORAOV1 chr11 69467844 69490184 +FADD chr11 70049269 70053496 +PPFIA1 chr11 70116806 70230509 +CTTN chr11 70244510 70282690 +DHCR7 chr11 71139239 71163914 +NADSYN1 chr11 71164155 71235153 +KRTAP5-10 chr11 71276609 71279980 +RNF121 chr11 71639747 71708643 +IL18BP chr11 71709587 71716761 +NUMA1 chr11 71713910 71791739 +LRTOMT chr11 71791382 71821828 +LAMTOR1 chr11 71796941 71814433 +ANAPC15 chr11 71817424 71823826 +FOLR2 chr11 71927645 71932994 +CLPB chr11 72003469 72145692 +PDE2A chr11 72287185 72385635 +ARAP1 chr11 72396114 72504644 +STARD10 chr11 72465774 72504726 +ATG16L2 chr11 72525353 72554719 +FCHSD2 chr11 72547790 72853306 +RELT chr11 73087309 73108519 +FAM168A chr11 73111532 73309234 +PLEKHB1 chr11 73357223 73373864 +RAB6A chr11 73386683 73472182 +MRPL48 chr11 73498361 73576178 +COA4 chr11 73583712 73588033 +PAAF1 chr11 73587744 73638790 +UCP2 chr11 73685712 73694352 +C2CD3 chr11 73723763 73882255 +PPME1 chr11 73882144 73965748 +P4HA3 chr11 73946846 74022702 +PGM2L1 chr11 74041363 74109518 +POLD3 chr11 74204896 74380162 +XRRA1 chr11 74518784 74660245 +SPCS2 chr11 74660292 74690076 +SLCO2B1 chr11 74811608 74917594 +RPS3 chr11 75110530 75133324 +SNORD15B chr11 75115465 75115610 +UVRAG chr11 75526212 75854239 +PRKRIR chr11 76061000 76092015 +C11orf30 chr11 76155967 76264069 +ACER3 chr11 76571911 76737841 +PAK1 chr11 77032752 77185680 +CLNS1A chr11 77225981 77348850 +AQP11 chr11 77300436 77321400 +RSF1 chr11 77371041 77532063 +AAMDC chr11 77532155 77629478 +INTS4 chr11 77589766 77705724 +NDUFC2 chr11 77779350 77791265 +ALG8 chr11 77811982 77850706 +GAB2 chr11 77926343 78129394 +NARS2 chr11 78147007 78285919 +PRCP chr11 82534544 82681626 +RAB30 chr11 82684175 82782965 +PCF11 chr11 82868030 82898493 +CCDC90B chr11 82970139 82997450 +DLG2 chr11 83166055 85338966 +TMEM126B chr11 85339629 85347580 +TMEM126A chr11 85359011 85367591 +CREBZF chr11 85370752 85393951 +PICALM chr11 85668727 85780924 +EED chr11 85955586 85989855 +C11orf73 chr11 86013253 86056969 +ME3 chr11 86152150 86383678 +OR7E2P chr11 86568101 86568989 +TMEM135 chr11 86748886 87034800 +CTSC chr11 88026760 88070955 +TRIM77 chr11 89443467 89451040 +CHORDC1 chr11 89934328 89956532 +SLC36A4 chr11 92877341 92931130 +TAF1D chr11 93463114 93517557 +SNORD6 chr11 93464668 93464740 +C11orf54 chr11 93474757 93497915 +MED17 chr11 93517393 93547861 +PANX1 chr11 93862094 93915138 +FOLR4 chr11 94038759 94040887 +MRE11A chr11 94152895 94227074 +ANKRD49 chr11 94226795 94232749 +CWC15 chr11 94695787 94706776 +SRSF8 chr11 94800361 94802388 +ENDOD1 chr11 94822974 94865809 +FAM76B chr11 95502106 95523573 +CEP57 chr11 95523129 95565857 +MTMR2 chr11 95566046 95658479 +MAML2 chr11 95709762 96076344 +CCDC82 chr11 96085933 96123087 +JRKL chr11 96123153 96240738 +JRKL-AS1 chr11 96180296 96239990 +YAP1 chr11 101981192 102104154 +BIRC2 chr11 102217942 102249401 +TMEM123 chr11 102267063 102341115 +MMP1 chr11 102660651 102668891 +CASP4 chr11 104813593 104840163 +CASP1 chr11 104896170 104972158 +GRIA4 chr11 105480721 105852819 +MSANTD4 chr11 105866350 105893130 +AASDHPPT chr11 105946228 105969437 +GUCY1A2 chr11 106544738 106889250 +CWF19L2 chr11 107197071 107328572 +ELMOD1 chr11 107461817 107537505 +SLC35F2 chr11 107661717 107799019 +RAB39A chr11 107799229 107834208 +CUL5 chr11 107879459 107978503 +ACAT1 chr11 107992243 108018503 +NPAT chr11 108027942 108093369 +ATM chr11 108093211 108239829 +DDX10 chr11 108535752 108811657 +RDX chr11 110045605 110167447 +FDX1 chr11 110300607 110335605 +C11orf53 chr11 111126707 111157126 +LAYN chr11 111411005 111432264 +SIK2 chr11 111473115 111601577 +PPP2R1B chr11 111597632 111637151 +ALG9 chr11 111652919 111742305 +FDXACB1 chr11 111744780 111751967 +C11orf1 chr11 111749659 111756699 +CRYAB chr11 111779289 111794446 +DIXDC1 chr11 111797868 111893308 +DLAT chr11 111895538 111935114 +C11orf57 chr11 111944810 111955874 +TIMM8B chr11 111955524 111957522 +SDHD chr11 111957497 111990353 +IL18 chr11 112013974 112034840 +PTS chr11 112097088 112140678 +NCAM1 chr11 112831997 113149158 +ANKK1 chr11 113258513 113271140 +TMPRSS5 chr11 113558272 113577095 +ZW10 chr11 113603909 113644533 +USP28 chr11 113668596 113746292 +ZBTB16 chr11 113930315 114121398 +NNMT chr11 114128509 114184007 +C11orf71 chr11 114262165 114271139 +RBM7 chr11 114270752 114284925 +REXO2 chr11 114310108 114321001 +CADM1 chr11 115039938 115375675 +BUD13 chr11 116618886 116643704 +ZNF259 chr11 116648436 116658766 +APOA5 chr11 116660083 116663136 +PAFAH1B2 chr11 117014983 117047610 +SIDT2 chr11 117049449 117068160 +PCSK7 chr11 117075053 117103241 +RNF214 chr11 117103341 117157161 +BACE1 chr11 117156402 117186975 +DSCAML1 chr11 117298489 117688240 +FXYD6 chr11 117707693 117748201 +IL10RA chr11 117857063 117872196 +SCN4B chr11 118004092 118023603 +UBE4A chr11 118230300 118269926 +ATP5L chr11 118271869 118302211 +TTC36 chr11 118398187 118401912 +TMEM25 chr11 118401756 118417995 +IFT46 chr11 118415243 118443685 +ARCN1 chr11 118443105 118473748 +PHLDB1 chr11 118477155 118528741 +DDX6 chr11 118620034 118661858 +UPK2 chr11 118795873 118829269 +CCDC84 chr11 118868852 118886501 +RPL23AP64 chr11 118873722 118874156 +RPS25 chr11 118886422 118889401 +TRAPPC4 chr11 118889142 118896164 +SLC37A4 chr11 118894824 118901616 +HYOU1 chr11 118914900 118927913 +VPS11 chr11 118938403 118952688 +HMBS chr11 118955576 118964259 +H2AFX chr11 118964564 118966177 +DPAGT1 chr11 118967213 118979041 +HINFP chr11 118992297 119006752 +CBL chr11 119076752 119178859 +MCAM chr11 119179241 119192231 +RNF26 chr11 119205237 119208023 +USP2 chr11 119225925 119252436 +THY1 chr11 119288090 119295695 +PVRL1 chr11 119494120 119599794 +TMEM136 chr11 120195838 120204391 +SORL1 chr11 121322912 121504402 +UBASH3B chr11 122526383 122685181 +HSPA8 chr11 122928197 122933938 +GRAMD1B chr11 123396344 123498482 +SCN3B chr11 123499895 123525952 +ZNF202 chr11 123594885 123612383 +OR6X1 chr11 123624196 123625253 +OR10G7 chr11 123908773 123909717 +VWA5A chr11 123986069 124018428 +OR8B12 chr11 124412578 124413575 +TBRG1 chr11 124492732 124505287 +SIAE chr11 124503009 124565603 +ROBO3 chr11 124735282 124751366 +HEPACAM chr11 124789089 124806308 +HEPN1 chr11 124789146 124790573 +SLC37A2 chr11 124932963 124959131 +FEZ1 chr11 125315646 125366213 +EI24 chr11 125439112 125454575 +STT3A chr11 125461607 125495110 +PUS3 chr11 125763381 125773116 +DDX25 chr11 125773271 125793158 +CDON chr11 125825691 125933230 +RPUSD4 chr11 126071993 126081587 +FAM118B chr11 126081309 126132881 +SRPR chr11 126132814 126139039 +FOXRED1 chr11 126138950 126148026 +TIRAP chr11 126152960 126168740 +DCPS chr11 126173647 126215644 +ST3GAL4 chr11 126225535 126310239 +MIR3167 chr11 126858354 126858438 +ETS1 chr11 128328656 128457453 +ARHGAP32 chr11 128834955 129149219 +BARX2 chr11 129245835 129322171 +NFRKB chr11 129733531 129765485 +APLP2 chr11 129939732 130014699 +ZBTB44 chr11 130096572 130184581 +C11orf44 chr11 130542851 130587247 +SNX19 chr11 130745331 130786404 +NTM chr11 131240373 132206716 +IGSF9B chr11 133778459 133826880 +JAM3 chr11 133938820 134021896 +NCAPD3 chr11 134020014 134095348 +VPS26B chr11 134094539 134117686 +THYN1 chr11 134118173 134123264 +ACAD8 chr11 134123389 134135749 +B3GAT1 chr11 134248398 134281812 +KDM5A chr12 389295 498620 +CCDC77 chr12 498439 551811 +NINJ2 chr12 673462 772945 +WNK1 chr12 861759 1020618 +RAD52 chr12 1021243 1099219 +ERC1 chr12 1099675 1605099 +ADIPOR2 chr12 1797740 1897844 +CACNA2D4 chr12 1901123 2028002 +DCP1B chr12 2055220 2113701 +FKBP4 chr12 2904119 2914576 +ITFG2 chr12 2921788 2968957 +NRIP2 chr12 2934514 2944710 +FOXM1 chr12 2966847 2986206 +RHNO1 chr12 2985424 2998626 +TULP3 chr12 2986389 3050306 +PARP11 chr12 3900213 3982608 +CCND2 chr12 4382938 4414516 +C12orf5 chr12 4430371 4462338 +FGF23 chr12 4477393 4488894 +C12orf4 chr12 4596894 4647674 +DYRK4 chr12 4671370 4723325 +NDUFA9 chr12 4758261 4798454 +KCNA5 chr12 5153085 5155949 +CD9 chr12 6308881 6347425 +TNFRSF1A chr12 6437923 6451280 +CD27-AS1 chr12 6548167 6560733 +CD27 chr12 6554033 6560884 +TAPBPL chr12 6560856 6575683 +VAMP1 chr12 6571403 6580153 +MRPL51 chr12 6601150 6603007 +GAPDH chr12 6643093 6647537 +IFFO1 chr12 6647541 6665239 +NOP2 chr12 6666029 6677857 +CHD4 chr12 6679249 6716642 +LPAR5 chr12 6728001 6745613 +ING4 chr12 6759446 6772314 +ZNF384 chr12 6775643 6798738 +COPS7A chr12 6832907 6841041 +MLF2 chr12 6857170 6876641 +PTMS chr12 6874682 6880116 +CD4 chr12 6896024 6929974 +GPR162 chr12 6930711 6939136 +USP5 chr12 6961292 6975796 +TPI1 chr12 6976283 6980112 +LRRC23 chr12 6982733 7023407 +ENO2 chr12 7022909 7032861 +ATN1 chr12 7033626 7051484 +C12orf57 chr12 7052141 7055166 +PTPN6 chr12 7055631 7070479 +EMG1 chr12 7072408 7105520 +PHB2 chr12 7074490 7079988 +LPCAT3 chr12 7085348 7125814 +C1S chr12 7096351 7178336 +CLSTN3 chr12 7282294 7311541 +PEX5 chr12 7341281 7371170 +APOBEC1 chr12 7801996 7818499 +NANOG chr12 7940390 7948655 +SLC2A3 chr12 8071826 8088871 +FOXJ2 chr12 8185299 8208099 +C3AR1 chr12 8210898 8219067 +NECAP1 chr12 8234807 8250367 +CLEC4A chr12 8276228 8291203 +FAM66C chr12 8332805 8368747 +AICDA chr12 8754762 8765467 +RIMKLB chr12 8834196 8935691 +PHC1 chr12 9066492 9094063 +M6PR chr12 9092959 9102551 +A2M chr12 9220260 9268825 +PZP chr12 9301436 9360966 +CLEC2D chr12 9817565 9848413 +CD69 chr12 9905082 9913497 +KLRF2 chr12 10034088 10048432 +CLEC7A chr12 10269376 10282857 +OLR1 chr12 10310902 10324737 +GABARAPL1 chr12 10365057 10375727 +KLRD1 chr12 10378657 10469850 +KLRC3 chr12 10564911 10573194 +KLRC2 chr12 10579453 10594899 +MAGOHB chr12 10758612 10766222 +STYK1 chr12 10771538 10826917 +TAS2R50 chr12 11138512 11139511 +PRB1 chr12 11504757 11548500 +LRP6 chr12 12268959 12419946 +MANSC1 chr12 12482198 12503475 +LOH12CR2 chr12 12508342 12510001 +LOH12CR1 chr12 12510013 12619840 +DUSP16 chr12 12628829 12715317 +CREBL2 chr12 12764761 12798042 +GPR19 chr12 12813825 12849141 +CDKN1B chr12 12867992 12875305 +DDX47 chr12 12966250 12982915 +HEBP1 chr12 13127798 13153207 +HTR7P1 chr12 13152812 13155768 +KIAA1467 chr12 13197218 13295455 +GSG1 chr12 13236494 13256619 +ATF7IP chr12 14518610 14651697 +H2AFJ chr12 14927270 14930936 +WBP11 chr12 14939410 14956474 +ARHGDIB chr12 15094951 15114662 +PTPRO chr12 15475331 15750333 +EPS8 chr12 15773092 16035263 +STRAP chr12 16035325 16056412 +DERA chr12 16064106 16190220 +PLEKHA5 chr12 19282648 19529334 +AEBP2 chr12 19556979 19873735 +PYROXD1 chr12 21590549 21623300 +RECQL chr12 21621845 21654603 +GOLT1B chr12 21654715 21671342 +LDHB chr12 21788276 21910791 +ABCC9 chr12 21950335 22094336 +CMAS chr12 22199108 22218608 +ST8SIA1 chr12 22216707 22589975 +C2CD5 chr12 22601517 22697480 +ETNK1 chr12 22778009 22843599 +LRMP chr12 25173936 25261268 +LYRM5 chr12 25348150 25362579 +KRAS chr12 25357723 25403870 +IFLTD1 chr12 25562241 25801513 +BHLHE41 chr12 26272959 26278060 +SSPN chr12 26274924 26452223 +ITPR2 chr12 26490342 26986131 +ASUN chr12 27058114 27091259 +FGFR1OP2 chr12 27091316 27119583 +TM7SF3 chr12 27126128 27167367 +MED21 chr12 27175479 27219276 +STK38L chr12 27396901 27478892 +MRPS35 chr12 27863706 27909228 +KLHL42 chr12 27932953 27955973 +CCDC91 chr12 28286182 28732883 +ERGIC2 chr12 29490285 29534122 +IPO8 chr12 30781922 30848920 +CAPRIN2 chr12 30862486 30907885 +DDX11 chr12 31226779 31257725 +FAM60A chr12 31433518 31479992 +DENND5B chr12 31535157 31744031 +DENND5B-AS1 chr12 31742857 31768600 +AMN1 chr12 31824071 31882108 +KIAA1551 chr12 32112304 32146039 +BICD1 chr12 32259769 32536567 +FGD4 chr12 32552463 32798984 +DNM1L chr12 32832134 32898486 +YARS2 chr12 32880424 32908836 +SYT10 chr12 33527173 33592754 +KIF21A chr12 39687030 39837192 +SLC2A13 chr12 40148823 40499891 +CNTN1 chr12 41086244 41466220 +YAF2 chr12 42550906 42632151 +PPHLN1 chr12 42632249 42853517 +ZCRB1 chr12 42705880 42719920 +PUS7L chr12 44122410 44152620 +IRAK4 chr12 44152747 44183346 +TWF1 chr12 44187526 44200178 +TMEM117 chr12 44229770 44783545 +ANO6 chr12 45609770 45834187 +ARID2 chr12 46123448 46301823 +SCAF11 chr12 46312914 46385903 +SLC38A1 chr12 46576846 46663800 +SLC38A2 chr12 46751972 46766650 +SLC38A4 chr12 47158546 47226191 +RPAP3 chr12 48057070 48099844 +SLC48A1 chr12 48147699 48176536 +HDAC7 chr12 48176505 48226915 +SENP1 chr12 48436681 48500091 +PFKM chr12 48498922 48540187 +ASB8 chr12 48541571 48574996 +ZNF641 chr12 48733791 48745197 +KANSL2 chr12 49047184 49076021 +CCNT1 chr12 49082247 49110681 +ADCY6 chr12 49159975 49182820 +DDX23 chr12 49223547 49246625 +RND1 chr12 49250928 49259681 +ARF3 chr12 49329506 49351334 +DDN chr12 49388932 49393092 +PRKAG1 chr12 49396057 49412980 +LMBR1L chr12 49490919 49504683 +TUBA1B chr12 49521565 49525180 +TUBA1A chr12 49578579 49583107 +TUBA1C chr12 49582519 49667114 +C1QL4 chr12 49726200 49730971 +SPATS2 chr12 49760367 49921205 +MCRS1 chr12 49950327 49961936 +PRPF40B chr12 49962001 50038449 +FMNL3 chr12 50031724 50101948 +TMBIM6 chr12 50101508 50158717 +BCDIN3D-AS1 chr12 50222325 50234926 +BCDIN3D chr12 50231573 50236912 +FAIM2 chr12 50260679 50298000 +AQP6 chr12 50360977 50370922 +RACGAP1 chr12 50370706 50426919 +ASIC1 chr12 50451331 50477394 +SMARCD1 chr12 50478755 50494495 +GPD1 chr12 50497602 50505102 +COX14 chr12 50505762 50514240 +CERS5 chr12 50523575 50561288 +LIMA1 chr12 50569571 50677329 +LARP4 chr12 50786166 50873787 +DIP2B chr12 50898768 51142450 +ATF1 chr12 51157493 51214905 +METTL7A chr12 51317255 51326300 +HIGD1C chr12 51347705 51364289 +SLC11A2 chr12 51373184 51422349 +LETMD1 chr12 51441745 51454207 +CSRNP2 chr12 51454990 51477447 +TFCP2 chr12 51487446 51566926 +POU6F1 chr12 51580719 51611477 +DAZAP2 chr12 51632076 51665146 +BIN2 chr12 51674822 51718452 +GALNT6 chr12 51745031 51786651 +SLC4A8 chr12 51785101 51902980 +NR4A1 chr12 52416616 52453291 +C12orf44 chr12 52463030 52471278 +LINC00592 chr12 52604714 52617588 +KRT6C chr12 52862300 52867569 +KRT76 chr12 53161939 53171129 +EIF4B chr12 53399942 53435993 +TENC1 chr12 53440753 53458156 +SPRYD3 chr12 53458388 53473204 +IGFBP6 chr12 53491220 53496129 +CSAD chr12 53551447 53575135 +MFSD5 chr12 53645035 53648189 +PFDN5 chr12 53689075 53693234 +C12orf10 chr12 53693470 53700961 +AAAS chr12 53701240 53718648 +PRR13 chr12 53835389 53840429 +PCBP2 chr12 53835525 53874946 +MAP3K12 chr12 53874274 53893847 +TARBP2 chr12 53894705 53900215 +ATF7 chr12 53901640 54020199 +ATP5G2 chr12 54026510 54071192 +CALCOCO1 chr12 54104903 54121529 +HOXC9 chr12 54388679 54397121 +SMUG1 chr12 54558529 54582778 +CBX5 chr12 54624724 54673886 +HNRNPA1 chr12 54673977 54680872 +COPZ1 chr12 54694986 54745633 +NCKAP1L chr12 54891495 54937726 +LACRT chr12 55024595 55028679 +OR6C3 chr12 55725485 55726420 +METTL7B chr12 56075330 56078395 +BLOC1S1 chr12 56109820 56113871 +CD63 chr12 56119107 56123491 +SARNP chr12 56146247 56211540 +ORMDL2 chr12 56211703 56215663 +DNAJC14 chr12 56214744 56224608 +TMEM198B chr12 56223529 56230030 +MMP19 chr12 56229217 56236750 +WIBG chr12 56295197 56326402 +DGKA chr12 56321103 56347811 +RAB5B chr12 56367697 56388490 +SUOX chr12 56390964 56400425 +RPS26 chr12 56435637 56438116 +ERBB3 chr12 56473641 56497289 +PA2G4 chr12 56498103 56507691 +RPL41 chr12 56510370 56511727 +ZC3H10 chr12 56511943 56516278 +ESYT1 chr12 56512034 56538455 +MYL6B chr12 56546040 56553431 +MYL6 chr12 56551945 56557280 +SMARCC2 chr12 56556767 56583351 +RNF41 chr12 56598285 56615717 +NABP2 chr12 56615799 56623638 +ANKRD52 chr12 56631591 56652175 +COQ10A chr12 56660642 56664750 +CS chr12 56665483 56694176 +CNPY2 chr12 56703626 56710120 +PAN2 chr12 56710007 56727837 +STAT2 chr12 56735381 56753939 +SPRYD4 chr12 56862301 56864763 +RBMS2 chr12 56915713 56984745 +BAZ2A chr12 56989380 57030600 +ATP5B chr12 57031959 57039798 +PTGES3 chr12 57057127 57082159 +NACA chr12 57106212 57125412 +PRIM1 chr12 57125380 57146157 +HSD17B6 chr12 57145945 57181574 +SNORA48 chr12 57255159 57255292 +LRP1 chr12 57522276 57607134 +SHMT2 chr12 57623110 57628718 +R3HDM2 chr12 57643392 57824788 +ARHGAP9 chr12 57866038 57882597 +MARS chr12 57869228 57911352 +DDIT3 chr12 57910371 57914300 +DCTN2 chr12 57923885 57941114 +KIF5A chr12 57943781 57980415 +PIP4K2C chr12 57984957 57997198 +DTX3 chr12 57998405 58003587 +B4GALNT1 chr12 58017193 58027138 +OS9 chr12 58087738 58115340 +AGAP2 chr12 58118980 58135940 +TSPAN31 chr12 58131796 58143994 +CDK4 chr12 58141510 58149796 +METTL1 chr12 58162254 58166576 +TSFM chr12 58176372 58201854 +AVIL chr12 58191159 58212487 +CTDSP2 chr12 58213710 58240522 +USP15 chr12 62654119 62811211 +MON2 chr12 62860597 62991363 +MIRLET7I chr12 62997466 62997550 +PPM1H chr12 63037762 63328817 +TMEM5 chr12 64173583 64203338 +SRGAP1 chr12 64238073 64541613 +C12orf66 chr12 64580096 64616076 +XPOT chr12 64798130 64844907 +TBK1 chr12 64845660 64895888 +RASSF3 chr12 65004293 65091347 +GNS chr12 65107225 65153227 +LEMD3 chr12 65563351 65642107 +LLPH chr12 66516842 66524548 +TMBIM4 chr12 66517709 66563852 +CAND1 chr12 67663061 67713731 +DYRK2 chr12 68042118 68059186 +MDM1 chr12 68666223 68726161 +RAP1B chr12 69004619 69054372 +NUP107 chr12 69080514 69136785 +SLC35E3 chr12 69139886 69187508 +MDM2 chr12 69201956 69239214 +CPM chr12 69235977 69365350 +CPSF6 chr12 69633317 69668138 +LYZ chr12 69742121 69748014 +YEATS4 chr12 69753483 69784576 +FRS2 chr12 69864129 69973562 +MIR3913-2 chr12 69978502 69978603 +CCT2 chr12 69979114 69995350 +RAB3IP chr12 70132461 70216984 +CNOT2 chr12 70636774 70748773 +LGR5 chr12 71833550 71980090 +ZFC3H1 chr12 72003252 72061505 +THAP2 chr12 72056789 72074419 +TMEM19 chr12 72079867 72097836 +RAB21 chr12 72148654 72184699 +TBC1D15 chr12 72233487 72320629 +ATXN7L3B chr12 74931551 74935223 +GLIPR1L2 chr12 75784850 75826468 +GLIPR1 chr12 75874460 75897633 +KRR1 chr12 75890684 75905416 +PHLDA1 chr12 76419227 76427712 +NAP1L1 chr12 76438670 76478813 +BBS10 chr12 76738254 76742222 +OSBPL8 chr12 76745577 76953589 +ZDHHC17 chr12 77157368 77247476 +PPP1R12A chr12 80167343 80329240 +PTPRQ chr12 80799774 81072802 +PPFIA2 chr12 81652045 82153332 +CCDC59 chr12 82617460 82752584 +METTL25 chr12 82752276 82873015 +C12orf50 chr12 88373816 88427814 +C12orf29 chr12 88427623 88443937 +TMTC3 chr12 88536073 88593664 +DUSP6 chr12 89741009 89747048 +POC1B chr12 89813495 89919801 +ATP2B1 chr12 89981828 90103077 +CCER1 chr12 91299399 91348953 +BTG1 chr12 92536286 92539673 +PLEKHG7 chr12 93115281 93166231 +EEA1 chr12 93164413 93323107 +UBE2N chr12 93799449 93836038 +MRPL42 chr12 93861264 93897545 +CRADD chr12 94071151 94288616 +CCDC41 chr12 94700225 94853764 +MIR5700 chr12 94955565 94955635 +TMCC3 chr12 94960900 95044338 +NDUFA12 chr12 95290831 95397546 +NR2C1 chr12 95414005 95467479 +VEZT chr12 95611522 95696566 +METAP2 chr12 95867296 95909615 +SNRPF chr12 96252706 96297606 +LTA4H chr12 96394606 96437298 +CDK17 chr12 96672039 96794338 +TMPO chr12 98909290 98944157 +SLC25A3 chr12 98987369 98995946 +SNORA53 chr12 98993413 98993661 +APAF1 chr12 99038919 99129204 +ANKS1B chr12 99120235 100378432 +UHRF1BP1L chr12 100422233 100536626 +ACTR6 chr12 100592900 100635643 +SCYL2 chr12 100660918 100735502 +NR1H4 chr12 100867486 100958191 +ANO4 chr12 101111304 101522419 +ARL1 chr12 101786898 101801598 +CHPT1 chr12 102090725 102137918 +GNPTAB chr12 102139275 102224716 +DRAM1 chr12 102271129 102405908 +CCDC53 chr12 102406705 102455927 +NUP37 chr12 102467967 102513902 +ASCL1 chr12 103351464 103354294 +C12orf42 chr12 103631369 103889749 +NT5DC3 chr12 104164231 104234975 +HSP90B1 chr12 104323885 104347423 +C12orf73 chr12 104343980 104359486 +TDG chr12 104359582 104382652 +HCFC2 chr12 104458235 104498639 +NFYB chr12 104510855 104532067 +TXNRD1 chr12 104609557 104744061 +CHST11 chr12 104849073 105155792 +SLC41A2 chr12 105196331 105352522 +C12orf45 chr12 105380088 105443515 +ALDH1L2 chr12 105413568 105478355 +KIAA1033 chr12 105501102 105562912 +APPL2 chr12 105567074 105630016 +C12orf75 chr12 105629068 105789875 +NUAK1 chr12 106457118 106533811 +CKAP4 chr12 106631655 106698057 +TCP11L2 chr12 106695707 106740793 +POLR3B chr12 106751436 106903976 +RIC8B chr12 107168373 107283090 +C12orf23 chr12 107349497 107372556 +MTERFD3 chr12 107371069 107380944 +CRY1 chr12 107385142 107487607 +PWP1 chr12 108079509 108106944 +CMKLR1 chr12 108681821 108733118 +FICD chr12 108908962 108919614 +SART3 chr12 108916357 108955176 +ISCU chr12 108956358 108963160 +TMEM119 chr12 108983622 108992096 +SELPLG chr12 109015686 109027735 +CORO1C chr12 109038885 109125372 +SSH1 chr12 109176466 109251366 +SVOP chr12 109304658 109459045 +USP30 chr12 109460894 109525831 +ALKBH2 chr12 109525996 109531436 +UNG chr12 109535379 109548797 +MYO1H chr12 109785708 109893328 +KCTD10 chr12 109886461 109915349 +UBE3B chr12 109915207 109974507 +MMAB chr12 109991542 110011679 +MVK chr12 110011060 110035067 +FAM222A chr12 110152033 110208312 +GLTP chr12 110288748 110318293 +TCHP chr12 110338069 110421646 +GIT2 chr12 110367607 110434194 +ANKRD13A chr12 110436991 110477568 +C12orf76 chr12 110465872 110511491 +IFT81 chr12 110562140 110656602 +ATP2A2 chr12 110718561 110788898 +ANAPC7 chr12 110810705 110841535 +ARPC3 chr12 110872630 110888227 +GPN3 chr12 110890289 110907073 +FAM216A chr12 110906169 110928190 +VPS29 chr12 110928902 110939922 +TCTN1 chr12 111051832 111087235 +PPP1CC chr12 111157485 111180744 +SH2B3 chr12 111843752 111889427 +ATXN2 chr12 111890018 112037480 +BRAP chr12 112079950 112123790 +ACAD10 chr12 112123857 112194903 +ALDH2 chr12 112204691 112247782 +MAPKAPK5-AS1 chr12 112277571 112280706 +MAPKAPK5 chr12 112279782 112334343 +TMEM116 chr12 112369086 112450970 +ERP29 chr12 112451120 112461255 +NAA25 chr12 112464500 112546826 +TRAFD1 chr12 112563305 112591407 +RPL6 chr12 112842994 112856642 +PTPN11 chr12 112856155 112947717 +RPH3A chr12 113008184 113336686 +OAS1 chr12 113344582 113369990 +OAS3 chr12 113376157 113411054 +RASAL1 chr12 113536624 113574044 +DDX54 chr12 113594979 113623284 +C12orf52 chr12 113623331 113630173 +TPCN1 chr12 113658855 113736390 +PLBD2 chr12 113796371 113827203 +SDSL chr12 113860042 113876081 +RBM19 chr12 114254543 114404176 +MED13L chr12 116395711 116715143 +LINC00173 chr12 116971227 116974323 +MAP1LC3B2 chr12 116997186 117014425 +C12orf49 chr12 117153593 117175875 +RNFT2 chr12 117176096 117291436 +FBXW8 chr12 117348761 117468953 +FBXO21 chr12 117581146 117628336 +RFC5 chr12 118451393 118470935 +WSB2 chr12 118470712 118500235 +VSIG10 chr12 118501398 118573831 +PEBP1 chr12 118573663 118583389 +TAOK3 chr12 118587606 118810750 +SUDS3 chr12 118814185 118855840 +PRKAB1 chr12 120105558 120119435 +CIT chr12 120123595 120315095 +GCN1L1 chr12 120565007 120632513 +RPLP0 chr12 120634489 120639038 +PXN-AS1 chr12 120639094 120650941 +PXN chr12 120648250 120703574 +PLA2G1B chr12 120759914 120765592 +MSI1 chr12 120779133 120806983 +COX6A1 chr12 120875893 120878545 +TRIAP1 chr12 120881764 120884215 +GATC chr12 120884241 120899389 +SRSF9 chr12 120899471 120907596 +DYNLL1 chr12 120907653 120936296 +COQ5 chr12 120941077 120972237 +RNF10 chr12 120971283 121015397 +POP5 chr12 121016567 121019201 +CABP1 chr12 121078355 121105127 +MLEC chr12 121124672 121139667 +SPPL3 chr12 121200313 121342174 +C12orf43 chr12 121440225 121454305 +P2RX7 chr12 121570622 121623876 +P2RX4 chr12 121647660 121671909 +CAMKK2 chr12 121675497 121736111 +ANAPC5 chr12 121746048 121837699 +RNF34 chr12 121837844 121868389 +KDM2B chr12 121866900 122018920 +SETD1B chr12 122242086 122270562 +PSMD9 chr12 122326637 122356203 +BCL7A chr12 122457328 122499948 +MLXIP chr12 122516628 122631894 +DIABLO chr12 122692210 122712081 +VPS33A chr12 122714111 122751068 +CLIP1 chr12 122755979 122907179 +ZCCHC8 chr12 122957417 122985518 +RSRC2 chr12 122989190 123011547 +DENR chr12 123237321 123255611 +HIP1R chr12 123319000 123347507 +VPS37B chr12 123349882 123380991 +OGFOD2 chr12 123459127 123464590 +ARL6IP4 chr12 123464607 123467456 +MPHOSPH9 chr12 123636867 123728561 +C12orf65 chr12 123717463 123742506 +CDK2AP1 chr12 123745528 123756881 +SBNO1 chr12 123773656 123849390 +RILPL2 chr12 123899936 123921264 +SNRNP35 chr12 123942188 123957701 +TMED2 chr12 124069078 124083116 +DDX55 chr12 124086624 124105488 +EIF2B1 chr12 124104953 124118313 +GTF2H3 chr12 124118375 124147153 +ZNF664 chr12 124456392 124499986 +NCOR2 chr12 124808961 125052135 +SCARB1 chr12 125261402 125367214 +UBC chr12 125396150 125401914 +MIR5188 chr12 125400093 125400205 +AACS chr12 125549925 125627873 +LINC00507 chr12 128399917 128436097 +SLC15A4 chr12 129277739 129308528 +TMEM132D chr12 129556270 130388211 +STX2 chr12 131274145 131323811 +RAN chr12 131356424 131362223 +SFSWAP chr12 132195626 132284282 +ULK1 chr12 132379196 132407712 +PUS1 chr12 132413745 132428406 +EP400 chr12 132434508 132565005 +DDX51 chr12 132621139 132628880 +NOC4L chr12 132628993 132637013 +POLE chr12 133200348 133263951 +PXMP2 chr12 133264192 133297276 +PGAM5 chr12 133287405 133299228 +ANKLE2 chr12 133302254 133338474 +GOLGA3 chr12 133345495 133405444 +CHFR chr12 133398773 133532890 +ZNF605 chr12 133498019 133532892 +ZNF26 chr12 133562951 133589154 +ZNF84 chr12 133613878 133639885 +ZNF140 chr12 133656424 133684130 +ZNF10 chr12 133707161 133736051 +ZNF268 chr12 133707570 133783698 +ANKRD20A9P chr13 19390858 19446107 +MPHOSPH8 chr13 20207788 20247599 +PSPC1 chr13 20248896 20357142 +ZMYM5 chr13 20397622 20437776 +ZMYM2 chr13 20532810 20665968 +CRYL1 chr13 20977806 21099996 +IFT88 chr13 21140585 21265503 +IL17D chr13 21276266 21297237 +N6AMT2 chr13 21302870 21348097 +XPO4 chr13 21351469 21477187 +SAP18 chr13 21714653 21723221 +MRP63 chr13 21750784 21753223 +ZDHHC20 chr13 21950263 22033509 +LINC00424 chr13 22446909 22449705 +SACS chr13 23902965 24007841 +SPATA13 chr13 24553944 24881212 +PARP4 chr13 24995064 25086948 +PABPC3 chr13 25670300 25673389 +AMER2 chr13 25735822 25746426 +MTMR6 chr13 25802307 25862147 +NUPL1 chr13 25875662 25923938 +RNF6 chr13 26706253 26796791 +WASF3 chr13 27131840 27263085 +USP12 chr13 27640293 27746033 +RPL21 chr13 27825446 27830828 +GTF3A chr13 27998681 28009958 +MTIF3 chr13 28009776 28024739 +POLR1D chr13 28194903 28241548 +ATP5EP2 chr13 28519343 28519727 +PAN3 chr13 28712643 28869475 +POMP chr13 29233241 29253062 +SLC46A3 chr13 29274201 29293107 +SLC7A1 chr13 30083547 30169825 +UBL3 chr13 30338508 30424821 +HMGB1 chr13 31032884 31191734 +USPL1 chr13 31191830 31233686 +ALOX5AP chr13 31309645 31338556 +TEX26-AS1 chr13 31456070 31507983 +HSPH1 chr13 31710762 31736525 +FRY chr13 32605437 32870794 +N4BP2L1 chr13 32974861 33002315 +N4BP2L2 chr13 33006554 33112970 +PDS5B chr13 33160564 33352157 +RFC3 chr13 34392186 34540695 +NBEA chr13 35516424 36247159 +MAB21L1 chr13 36047926 36050832 +SPG20 chr13 36875775 36944317 +SMAD9 chr13 37418968 37494902 +ALG5 chr13 37523912 37574398 +EXOSC8 chr13 37572953 37583750 +SUPT20H chr13 37583449 37633850 +UFM1 chr13 38923986 38937140 +FREM2 chr13 39261266 39460074 +PROSER1 chr13 39584003 39612252 +NHLRC3 chr13 39612443 39624246 +LHFP chr13 39917029 40177665 +COG6 chr13 40229764 40365802 +MIR320D1 chr13 41301964 41302011 +MRPS31 chr13 41303432 41345309 +ELF1 chr13 41506056 41635576 +WBP4 chr13 41635410 41658137 +KBTBD6 chr13 41701705 41706882 +KBTBD7 chr13 41763969 41768702 +MTRF1 chr13 41790505 41837742 +NAA16 chr13 41885341 41951166 +RGCC chr13 42031695 42045018 +AKAP11 chr13 42846289 42897396 +EPSTI1 chr13 43460524 43566407 +DNAJC15 chr13 43597339 43683045 +ENOX1 chr13 43787654 44361044 +SERP2 chr13 44947801 44971850 +TSC22D1 chr13 45007655 45151283 +NUFIP1 chr13 45513384 45563618 +GTF2F2 chr13 45694650 45858237 +TPT1 chr13 45911008 45915505 +TPT1-AS1 chr13 45915480 45965872 +COG3 chr13 46039060 46110765 +ZC3H13 chr13 46528600 46626894 +CPB2-AS1 chr13 46626941 46687467 +LCP1 chr13 46700055 46786006 +ESD chr13 47345391 47371367 +SUCLA2 chr13 48510622 48612125 +NUDT15 chr13 48611703 48621358 +MED4 chr13 48627459 48669267 +ITM2B chr13 48807294 48837063 +RB1 chr13 48877887 49056122 +LPAR6 chr13 48963707 49018840 +RCBTB2 chr13 49063095 49107369 +FNDC3A chr13 49550048 49783915 +CAB39L chr13 49882786 50018262 +RCBTB1 chr13 50106082 50159719 +KPNA3 chr13 50273447 50367057 +CTAGE10P chr13 50464727 50467134 +SPRYD7 chr13 50486842 50510626 +TRIM13 chr13 50570024 50594617 +DLEU2 chr13 50601269 50699856 +DLEU1 chr13 50656307 51297372 +DLEU7-AS1 chr13 51381992 51423190 +RNASEH2B chr13 51483814 51544592 +INTS6 chr13 51928213 52028400 +WDFY2 chr13 52158644 52336171 +DHRS12 chr13 52342129 52378293 +ALG11 chr13 52586534 52603800 +UTP14C chr13 52598827 52607736 +NEK5 chr13 52611093 52703214 +NEK3 chr13 52706775 52733996 +MRPS31P5 chr13 52741844 52768600 +VPS36 chr13 52986737 53024763 +CKAP2 chr13 53029564 53050763 +HNRNPA1L2 chr13 53191605 53217919 +SUGT1 chr13 53226844 53275044 +PRR20A chr13 57715052 57718073 +PCDH17 chr13 58205944 58303445 +TDRD3 chr13 60970591 61148012 +MIR3169 chr13 61773932 61774014 +PCDH9 chr13 66876967 67804468 +SNORD37 chr13 73028040 73028105 +MZT1 chr13 73282495 73301825 +DIS3 chr13 73329540 73356234 +PIBF1 chr13 73356197 73590591 +KLF12 chr13 74260226 74708394 +CTAGE11P chr13 75812080 75814432 +COMMD6 chr13 76099350 76123575 +UCHL3 chr13 76123619 76180085 +KCTD12 chr13 77454312 77460540 +CLN5 chr13 77564795 77576652 +FBXL3 chr13 77566740 77601330 +MYCBP2 chr13 77618792 77901185 +SLAIN1 chr13 78272023 78338377 +EDNRB chr13 78469616 78493903 +RNF219 chr13 79188426 79233314 +LINC00331 chr13 79361454 79366442 +RBM26 chr13 79885962 79980612 +NDFIP2 chr13 80055287 80130210 +SPRY2 chr13 80910111 80915086 +SLITRK1 chr13 84451344 84456528 +MIR4500HG chr13 88079469 88323514 +GPC5 chr13 92050929 93519490 +SNORD22 chr13 94021100 94021213 +TGDS chr13 95226308 95248511 +ABCC4 chr13 95672083 95953687 +DZIP1 chr13 96230457 96296957 +DNAJC3 chr13 96329393 96447243 +UGGT2 chr13 96453834 96705736 +MBNL2 chr13 97873688 98046374 +RAP2A chr13 98086476 98121382 +IPO5 chr13 98605912 98676551 +DOCK9 chr13 99445741 99738879 +UBAC2 chr13 99853028 100038688 +GPR18 chr13 99906968 99913998 +GPR183 chr13 99946784 99959659 +TM9SF2 chr13 100153671 100216260 +PCCA chr13 100741269 101182686 +GGACT chr13 101183810 101241782 +TMTC4 chr13 101256181 101327347 +NALCN chr13 101706130 102068843 +FGF14 chr13 102372134 103054124 +TPP2 chr13 103249353 103331521 +TEX30 chr13 103418340 103426161 +KDELC1 chr13 103436631 103451357 +BIVM chr13 103451399 103493885 +ERCC5 chr13 103497194 103528345 +DAOA-AS1 chr13 106111404 106158030 +ARGLU1 chr13 107194021 107220512 +LIG4 chr13 108859787 108870716 +ABHD13 chr13 108870727 108886603 +MYO16 chr13 109248500 109860355 +IRS2 chr13 110406184 110438915 +RAB20 chr13 111175417 111214080 +CARKD chr13 111267881 111292340 +ING1 chr13 111365083 111373421 +ANKRD10 chr13 111530887 111567416 +ARHGEF7 chr13 111766906 111958084 +TUBGCP3 chr13 113139325 113242481 +ATP11A chr13 113344643 113541482 +PCID2 chr13 113831891 113863029 +CUL4A chr13 113862552 113919399 +LAMP1 chr13 113951556 113977987 +DCUN1D2 chr13 114110134 114145267 +TMCO3 chr13 114145310 114204542 +TFDP1 chr13 114239013 114295785 +GAS6-AS1 chr13 114518603 114542321 +GAS6 chr13 114523522 114567046 +RASA3 chr13 114747194 114898086 +CDC16 chr13 115000362 115038198 +UPF3A chr13 115047059 115071283 +CHAMP1 chr13 115079988 115092796 +POTEM chr14 19983559 20020272 +OR4K13 chr14 20502003 20502919 +TTC5 chr14 20724717 20774153 +CCNB1IP1 chr14 20779527 20801471 +SNORD126 chr14 20794609 20794685 +RPPH1 chr14 20811207 20811844 +PARP2 chr14 20811741 20826064 +OSGEP chr14 20914570 20923264 +APEX1 chr14 20923350 20925927 +TMEM55B chr14 20925878 20929771 +PNP chr14 20937113 20945253 +RNASE9 chr14 21024252 21029090 +RNASE6 chr14 21249210 21250626 +RNASE1 chr14 21269387 21271437 +RNASE3 chr14 21359558 21360507 +METTL17 chr14 21457929 21465189 +NDRG2 chr14 21484922 21539031 +ARHGEF40 chr14 21538429 21558399 +ZNF219 chr14 21558205 21572881 +HNRNPC chr14 21677295 21737653 +SUPT16H chr14 21819631 21852425 +CHD8 chr14 21853353 21924285 +RAB2B chr14 21927179 21945132 +TOX4 chr14 21944756 21967319 +METTL3 chr14 21966277 21979517 +SALL2 chr14 21989232 22005350 +DAD1 chr14 23033805 23058175 +ABHD4 chr14 23067146 23081265 +OXA1L chr14 23235731 23241007 +MRPL52 chr14 23299088 23304246 +LRP10 chr14 23340822 23350789 +RBM23 chr14 23369854 23388393 +PRMT5 chr14 23389720 23398794 +HAUS4 chr14 23415437 23426370 +AJUBA chr14 23440383 23451851 +PSMB5 chr14 23485752 23504439 +ACIN1 chr14 23527773 23564823 +C14orf119 chr14 23563974 23569665 +PPP1R3E chr14 23764852 23772057 +BCL2L2-PABPN1 chr14 23776044 23794578 +SLC22A17 chr14 23815515 23822121 +EFS chr14 23825611 23834961 +CMTM5 chr14 23846017 23848981 +MYH7 chr14 23881947 23904927 +NGDN chr14 23938897 23979071 +THTPA chr14 24025216 24029480 +AP1G2 chr14 24028774 24037279 +JPH4 chr14 24037244 24048009 +DHRS4-AS1 chr14 24407940 24409196 +DHRS4 chr14 24422795 24438488 +DHRS4L2 chr14 24439148 24475617 +DCAF11 chr14 24583404 24594451 +PSME1 chr14 24605367 24608176 +EMC9 chr14 24608174 24610797 +PSME2 chr14 24612574 24616779 +RNF31 chr14 24615892 24629870 +IRF9 chr14 24630262 24635774 +REC8 chr14 24641062 24649463 +IPO4 chr14 24649425 24658170 +TM9SF1 chr14 24658363 24682679 +CHMP4A chr14 24678789 24683075 +MDP1 chr14 24683143 24685276 +NEDD8 chr14 24686058 24701660 +GMPR2 chr14 24701628 24708448 +TINF2 chr14 24708849 24711880 +RABGGTA chr14 24734744 24740945 +DHRS1 chr14 24759804 24769039 +RIPK3 chr14 24805227 24809251 +SDR39U1 chr14 24908972 24912111 +STXBP6 chr14 25278862 25519503 +NOVA1 chr14 26912299 27066960 +C14orf23 chr14 29241910 29282493 +G2E3 chr14 31028329 31089269 +SCFD1 chr14 31091318 31205018 +STRN3 chr14 31363005 31495607 +AP4S1 chr14 31494312 31562818 +HECTD1 chr14 31569318 31677010 +HEATR5A chr14 31760994 31889788 +DTD2 chr14 31915242 31926716 +NUBPL chr14 31959162 32330430 +ARHGAP5 chr14 32545320 32628934 +AKAP6 chr14 32798479 33300567 +NPAS3 chr14 33404139 34273382 +EGLN3 chr14 34393437 34931980 +SPTSSA chr14 34901995 34931562 +EAPP chr14 34985135 35008916 +SNX6 chr14 35030300 35099389 +CFL2 chr14 35179593 35184029 +BAZ1A chr14 35221937 35344853 +SRP54 chr14 35451163 35498773 +FAM177A1 chr14 35514113 35582336 +PPP2R3C chr14 35554673 35591723 +KIAA0391 chr14 35591052 35743271 +PSMA6 chr14 35747839 35786699 +NFKBIA chr14 35870717 35873955 +RALGAPA1 chr14 36007558 36278510 +BRMS1L chr14 36295524 36401531 +MBIP chr14 36767770 36789882 +CLEC14A chr14 38723308 38725574 +LINC00639 chr14 39218545 39417477 +SEC23A chr14 39501123 39578850 +GEMIN2 chr14 39583427 39606177 +TRAPPC6B chr14 39617015 39639736 +PNN chr14 39644387 39652422 +CTAGE5 chr14 39734488 39856156 +FSCB chr14 44973545 44976482 +KLHL28 chr14 45393522 45511525 +FAM179B chr14 45431411 45543634 +PRPF39 chr14 45553302 45585485 +FKBP3 chr14 45584803 45604522 +MIS18BP1 chr14 45672393 45722743 +MDGA2 chr14 47311134 48143999 +RPS29 chr14 50043390 50065408 +RPL36AL chr14 50085237 50087403 +MGAT2 chr14 50087489 50090198 +DNAAF2 chr14 50091892 50101948 +KLHDC1 chr14 50159823 50219870 +KLHDC2 chr14 50234326 50249909 +NEMF chr14 50249997 50319921 +ARF6 chr14 50359810 50361490 +SOS2 chr14 50583847 50698276 +L2HGDH chr14 50704281 50779266 +ATP5S chr14 50779044 50802276 +MAP4K5 chr14 50885219 51027844 +ATL1 chr14 50999227 51099786 +NIN chr14 51186481 51297839 +PYGL chr14 51324609 51411454 +TRIM9 chr14 51441980 51562779 +TMX1 chr14 51706880 51722759 +GNG2 chr14 52292913 52446060 +C14orf166 chr14 52456193 52471420 +TXNDC16 chr14 52897308 53019240 +PSMC6 chr14 53173890 53195305 +STYX chr14 53196898 53241716 +GNPNAT1 chr14 53241912 53258386 +FERMT2 chr14 53323986 53419153 +DDHD1 chr14 53510686 53620000 +GMFB chr14 54941202 54955914 +CGRRF1 chr14 54976530 55005567 +MIR4308 chr14 55344831 55344911 +SOCS4 chr14 55493948 55516206 +MAPK1IP1L chr14 55518349 55536910 +FBXO34 chr14 55738021 55828636 +ATG14 chr14 55833110 55878576 +KTN1 chr14 56025790 56168244 +PELI2 chr14 56584532 56768244 +EXOC5 chr14 57670518 57735726 +AP5M1 chr14 57735627 57756797 +SLC35F4 chr14 58030640 58448912 +C14orf37 chr14 58466453 58764857 +ACTR10 chr14 58666798 58701750 +PSMA3 chr14 58711549 58738730 +ARID4A chr14 58765103 58840605 +TIMM9 chr14 58875212 58894332 +KIAA0586 chr14 58894103 59015216 +GPR135 chr14 59895740 59932060 +L3HYPDH chr14 59927081 59951148 +JKAMP chr14 59951161 59972128 +RTN1 chr14 60062694 60337684 +PCNXL4 chr14 60558629 60635851 +DHRS7 chr14 60610838 60636574 +PPM1A chr14 60712470 60765805 +C14orf39 chr14 60863187 60982261 +MNAT1 chr14 61201460 61436671 +TRMT5 chr14 61438169 61448076 +SLC38A6 chr14 61447832 61550451 +HIF1A chr14 62162231 62214976 +HIF1A-AS2 chr14 62182276 62217815 +SNAPC1 chr14 62229075 62263146 +LINC00643 chr14 62584197 62596352 +PPP2R5E chr14 63838075 64010092 +WDR89 chr14 64063757 64108579 +SYNE2 chr14 64319683 64693165 +MTHFD1 chr14 64854749 64926722 +ZBTB25 chr14 64915824 64971931 +ZBTB1 chr14 64970430 65000408 +HSPA2 chr14 65002623 65009955 +PLEKHG3 chr14 65170820 65213610 +SPTB chr14 65213002 65346601 +CHURC1 chr14 65381079 65411309 +FNTB chr14 65381203 65529368 +CHURC1-FNTB chr14 65381203 65528521 +MAX chr14 65472892 65569413 +FUT8 chr14 65877310 66210839 +LINC00238 chr14 66953072 66965271 +GPHN chr14 66974125 67648520 +MPP5 chr14 67707826 67802536 +ATP6V1D chr14 67761088 67826982 +EIF2S1 chr14 67826714 67853233 +PLEKHH1 chr14 68000018 68056329 +PIGH chr14 68048672 68067004 +ARG2 chr14 68086515 68118437 +VTI1B chr14 68113792 68141548 +RDH11 chr14 68143518 68162531 +ZFYVE26 chr14 68194091 68283307 +ZFP36L1 chr14 69254377 69263190 +DCAF5 chr14 69517598 69619867 +EXD2 chr14 69658228 69709075 +ERH chr14 69846848 69865344 +SLC39A9 chr14 69864732 69929105 +KIAA0247 chr14 70078313 70181859 +SRSF5 chr14 70193617 70238722 +SMOC1 chr14 70320848 70499083 +COX16 chr14 70791798 70826448 +SYNJ2BP chr14 70838148 70883778 +MED6 chr14 71047974 71067384 +TTC9 chr14 71108504 71142077 +PCNX chr14 71374122 71582099 +DPF3 chr14 73086004 73360809 +DCAF4 chr14 73393040 73426411 +ZFYVE1 chr14 73436159 73493920 +RBM25 chr14 73525144 73588122 +PSEN1 chr14 73603126 73690399 +PAPLN chr14 73704205 73741348 +NUMB chr14 73741815 73930348 +ACOT2 chr14 74034324 74042357 +PNMA1 chr14 74178494 74181128 +ELMSAN1 chr14 74181825 74256988 +PTGR2 chr14 74318547 74353530 +ZNF410 chr14 74353320 74399214 +FAM161B chr14 74398204 74417117 +COQ6 chr14 74416629 74430373 +ALDH6A1 chr14 74523553 74551196 +LIN52 chr14 74551499 74667936 +ABCD4 chr14 74752126 74769759 +NPC2 chr14 74942895 74960880 +ISCA2 chr14 74960423 74963809 +FCF1 chr14 75179847 75203394 +YLPM1 chr14 75230069 75322244 +DLST chr14 75348594 75370448 +EIF2B2 chr14 75469614 75476292 +MLH3 chr14 75480467 75518235 +ACYP1 chr14 75519924 75536186 +NEK9 chr14 75548822 75594047 +TMED10 chr14 75598173 75643334 +FOS chr14 75745477 75748933 +TTLL5 chr14 76099968 76421421 +C14orf1 chr14 76116134 76127532 +IFT43 chr14 76368479 76550928 +TGFB3 chr14 76424442 76449334 +GPATCH2L chr14 76618259 76720685 +ESRRB chr14 76776957 76968178 +VASH1 chr14 77228532 77249354 +ANGEL1 chr14 77253588 77292589 +KIAA1737 chr14 77564440 77583630 +ZDHHC22 chr14 77597613 77609077 +POMT2 chr14 77741299 77787227 +GSTZ1 chr14 77787227 77797940 +TMED8 chr14 77801364 77843452 +VIPAS39 chr14 77893018 77924295 +AHSA1 chr14 77924213 77935817 +SPTLC2 chr14 77972340 78083116 +ALKBH1 chr14 78138747 78174363 +SLIRP chr14 78174414 78227447 +SNW1 chr14 78183942 78227550 +ADCK1 chr14 78266426 78401355 +NRXN3 chr14 78708734 80330762 +DIO2 chr14 80663873 80854100 +GTF2A1 chr14 81641796 81687721 +SEL1L chr14 81937893 82000205 +GALC chr14 88304164 88460009 +SPATA7 chr14 88851268 88936694 +ZC3H14 chr14 89029253 89079853 +EML5 chr14 89078775 89259096 +TTC8 chr14 89290497 89344335 +FOXN3 chr14 89591215 90085493 +EFCAB11 chr14 90261013 90421121 +PSMC1 chr14 90722839 90738968 +NRDE2 chr14 90742580 90798481 +CALM1 chr14 90862846 90874605 +RPS6KA5 chr14 91336799 91526980 +C14orf159 chr14 91526677 91691976 +SMEK1 chr14 91923955 91976898 +TC2N chr14 92246095 92333880 +TRIP11 chr14 92432335 92507240 +NDUFB1 chr14 92582466 92588261 +CPSF2 chr14 92588281 92630755 +LGMN chr14 93170152 93215047 +GOLGA5 chr14 93260576 93306308 +CHGA chr14 93389425 93401638 +ITPK1 chr14 93403259 93582665 +MOAP1 chr14 93648541 93651273 +TMEM251 chr14 93651296 93653434 +C14orf142 chr14 93669239 93673439 +UBR7 chr14 93673401 93695561 +UNC79 chr14 93799565 94174222 +PRIMA1 chr14 94184644 94254827 +DDX24 chr14 94517266 94547591 +IFI27L1 chr14 94547628 94570192 +IFI27L2 chr14 94594116 94596590 +PPP4R4 chr14 94612465 94746072 +SERPINA13P chr14 95107062 95113331 +DICER1 chr14 95552565 95624347 +DICER1-AS1 chr14 95623982 95646263 +SNHG10 chr14 95998634 96001209 +GLRX5 chr14 95999840 96011061 +C14orf132 chr14 96505661 96560417 +ATG2B chr14 96747595 96830207 +GSKIP chr14 96829814 96853625 +AK7 chr14 96858448 96955764 +PAPOLA chr14 96967770 97033448 +VRK1 chr14 97263641 97398059 +SETD3 chr14 99864083 99947216 +CCNK chr14 99947506 100001381 +CYP46A1 chr14 100150641 100193638 +EML1 chr14 100204030 100408397 +EVL chr14 100437786 100610573 +YY1 chr14 100704635 100749129 +SLC25A29 chr14 100757448 100772884 +WARS chr14 100800125 100843142 +WDR25 chr14 100842755 100996640 +BEGAIN chr14 101003486 101053750 +MEG3 chr14 101245747 101327368 +MIR431 chr14 101347344 101347457 +SNORD113-6 chr14 101405893 101405968 +SNORD114-9 chr14 101432366 101432437 +SNORD114-19 chr14 101442814 101442888 +SNORD114-29 chr14 101456428 101456497 +MIR329-1 chr14 101493122 101493201 +MIR1185-1 chr14 101509314 101509399 +MIR485 chr14 101521738 101521848 +MIR656 chr14 101533061 101533138 +PPP2R5C chr14 102228135 102394326 +DYNC1H1 chr14 102430865 102517129 +HSP90AA1 chr14 102547075 102606036 +WDR20 chr14 102605840 102691184 +CINP chr14 102808956 102829253 +TECPR2 chr14 102829300 102968818 +CDC42BPB chr14 103398716 103523799 +LINC00605 chr14 103653558 103655365 +EIF5 chr14 103799881 103811362 +MARK3 chr14 103851729 103970168 +CKB chr14 103985996 103989448 +TRMT61A chr14 103995521 104003410 +BAG5 chr14 104022881 104029168 +KLC1 chr14 104028233 104167888 +APOPT1 chr14 104029299 104073860 +ZFYVE21 chr14 104182067 104200005 +C14orf2 chr14 104378625 104394606 +TMEM179 chr14 104941015 105071984 +INF2 chr14 105155943 105185942 +SIVA1 chr14 105219437 105234831 +AKT1 chr14 105235686 105262088 +PLD4 chr14 105391153 105399574 +CDCA4 chr14 105475910 105487485 +BRF1 chr14 105675623 105781926 +PACS2 chr14 105766900 105864484 +TEX22 chr14 105864916 105916443 +CRIP2 chr14 105939299 105946499 +C14orf80 chr14 105956192 105965912 +LINC00221 chr14 106938445 106977000 +OR4M2 chr15 22368478 22369561 +TUBGCP5 chr15 22833395 22873892 +CYFIP1 chr15 22892005 23006016 +NIPA2 chr15 23004684 23034427 +WHAMMP3 chr15 23187728 23208737 +HERC2P2 chr15 23282281 23378228 +HERC2P7 chr15 23388289 23392657 +MKRN3 chr15 23810454 23873064 +NDN chr15 23930565 23932450 +PWRN2 chr15 24407901 24415095 +SNRPN chr15 25068794 25223870 +SNURF chr15 25200181 25245423 +SNORD116-5 chr15 25307480 25307574 +SNORD116-15 chr15 25326434 25326525 +SNORD116-27 chr15 25346722 25346813 +SNORD115-10 chr15 25432683 25432763 +SNORD115-21 chr15 25453230 25453310 +SNORD115-32 chr15 25474114 25474195 +SNORD115-42 chr15 25492492 25492573 +UBE3A chr15 25582381 25684128 +GABRB3 chr15 26788693 27184686 +HERC2 chr15 28356186 28567298 +HERC2P9 chr15 28834638 28930410 +WHAMMP2 chr15 28982729 29002537 +APBA2 chr15 29129629 29410518 +TJP1 chr15 29991571 30261068 +ARHGAP11B chr15 30916697 31065196 +FAN1 chr15 31196055 31235311 +MTMR10 chr15 31231144 31283810 +ARHGAP11A chr15 32907345 32932150 +SCG5 chr15 32933877 32989299 +EMC7 chr15 34376218 34394149 +PGBD4 chr15 34394274 34396591 +KATNBL1 chr15 34432875 34502297 +EMC4 chr15 34517200 34522357 +SLC12A6 chr15 34525460 34630261 +NOP10 chr15 34633917 34635378 +GOLGA8A chr15 34671269 34880704 +GOLGA8B chr15 34817408 34880704 +GJD2 chr15 35043233 35047166 +AQR chr15 35147732 35262040 +ZNF770 chr15 35270542 35280488 +C15orf41 chr15 36871812 37102449 +MEIS2 chr15 37181406 37393504 +TMCO5A chr15 38214140 38259925 +SPRED1 chr15 38544527 38649450 +FAM98B chr15 38746328 38779911 +EIF2AK4 chr15 40226347 40327797 +SRP14 chr15 40327940 40331389 +PLCB2 chr15 40570377 40600136 +C15orf52 chr15 40623653 40633168 +KNSTRN chr15 40674922 40686447 +IVD chr15 40697686 40728146 +C15orf57 chr15 40820882 40857256 +RPUSD2 chr15 40861499 40866659 +RMDN3 chr15 41028082 41048049 +DNAJC17 chr15 41060067 41099675 +ZFYVE19 chr15 41099284 41106767 +VPS18 chr15 41186628 41196173 +EXD1 chr15 41474923 41522941 +CHP1 chr15 41523037 41574043 +OIP5-AS1 chr15 41576188 41601901 +NUSAP1 chr15 41624892 41673248 +NDUFAF1 chr15 41679551 41694717 +RTF1 chr15 41700606 41775761 +RPAP1 chr15 41809374 41836467 +TYRO3 chr15 41849873 41871536 +MGA chr15 41913422 42062141 +MAPKBP1 chr15 42066632 42120053 +JMJD7 chr15 42120283 42129779 +PLA2G4B chr15 42120283 42140345 +EHD4 chr15 42190950 42264776 +PLA2G4E chr15 42273780 42343388 +VPS39 chr15 42450899 42500514 +TMEM87A chr15 42502730 42565861 +GANC chr15 42565431 42645864 +CAPN3 chr15 42640301 42704516 +SNAP23 chr15 42783431 42837547 +LRRC57 chr15 42834720 42841000 +HAUS2 chr15 42841008 42862192 +UBR1 chr15 43235095 43398311 +TMEM62 chr15 43415477 43477344 +CCNDBP1 chr15 43477316 43487396 +LCMT2 chr15 43619974 43622803 +ADAL chr15 43622872 43646096 +ZSCAN29 chr15 43650370 43663223 +TUBGCP4 chr15 43661419 43699293 +TP53BP1 chr15 43699407 43802926 +MAP1A chr15 43803156 43823818 +PPIP5K1 chr15 43825660 43882430 +CATSPER2 chr15 43920701 43960316 +CKMT1A chr15 43985084 43991420 +PDIA3 chr15 44038590 44065477 +SERF2 chr15 44069285 44094787 +HYPK chr15 44088340 44095241 +MFAP1 chr15 44096690 44117000 +PIN4P1 chr15 44168047 44168442 +CASC4 chr15 44580927 44707956 +CTDSPL2 chr15 44719432 44821236 +EIF3J chr15 44829255 44855227 +SPG11 chr15 44854894 44955876 +B2M chr15 45003675 45011075 +SORD chr15 45315302 45369383 +DUOX2 chr15 45384848 45406542 +GATM chr15 45653322 45694525 +SPATA5L1 chr15 45694529 45713617 +SLC30A4 chr15 45771809 45815005 +BLOC1S6 chr15 45879321 45908197 +SEMA6D chr15 47476298 48066420 +MYEF2 chr15 48431625 48470714 +SLC12A1 chr15 48483861 48596275 +DUT chr15 48623208 48635570 +SHC4 chr15 49115932 49255641 +EID1 chr15 49170083 49172380 +SECISBP2L chr15 49280673 49338760 +COPS2 chr15 49398268 49447858 +GALK2 chr15 49447853 49660066 +MIR4716 chr15 49461267 49461350 +DTWD1 chr15 49913177 49937333 +GABPB1 chr15 50569389 50647605 +GABPB1-AS1 chr15 50647525 50659636 +USP8 chr15 50716577 50793280 +TRPM7 chr15 50844670 50979012 +SPPL2A chr15 50999506 51058005 +GLDN chr15 51633826 51700210 +DMXL2 chr15 51739908 51915030 +SCG3 chr15 51973550 52013223 +LYSMD2 chr15 52015208 52043782 +TMOD2 chr15 52043758 52108565 +LEO1 chr15 52230222 52264003 +MAPK6 chr15 52244303 52358462 +GNB5 chr15 52413117 52483566 +MIR1266 chr15 52569314 52569397 +MYO5A chr15 52599480 52821247 +ARPP19 chr15 52839242 52862080 +FAM214A chr15 52873514 53002014 +RSL24D1 chr15 55473004 55489265 +PIGB chr15 55611158 55647846 +CCPG1 chr15 55632230 55700708 +RFX7 chr15 56379478 56535483 +ZNF280D chr15 56922379 57210769 +TCF12 chr15 57210821 57591479 +POLR2M chr15 57884231 58074960 +ADAM10 chr15 58887403 59042177 +FAM63B chr15 59063391 59154099 +RNF111 chr15 59157374 59389618 +SLTM chr15 59171244 59225852 +GTF2A2 chr15 59930261 59949740 +BNIP2 chr15 59951345 59981733 +NARG2 chr15 60711808 60771359 +VPS13C chr15 62144588 62352672 +TPM1 chr15 63334831 63364114 +RPS27L chr15 63418071 63450220 +RAB8B chr15 63481668 63559981 +APH1B chr15 63568217 63601325 +CA12 chr15 63613577 63674360 +USP3 chr15 63796793 63886839 +HERC1 chr15 63900817 64126141 +FAM96A chr15 64364758 64386217 +SNX1 chr15 64386322 64438289 +SNX22 chr15 64443914 64449680 +PPIB chr15 64448011 64455404 +TRIP4 chr15 64679947 64747502 +OAZ2 chr15 64979772 64995480 +MIR1272 chr15 65054586 65054714 +PLEKHO2 chr15 65134088 65160206 +SPG21 chr15 65255362 65282648 +MTFMT chr15 65294845 65321977 +PDCD7 chr15 65409717 65426174 +CLPX chr15 65440557 65477680 +PARP16 chr15 65526798 65592956 +DPP8 chr15 65734801 65810042 +PTPLAD1 chr15 65822756 65870687 +VWA9 chr15 65871091 65903627 +SLC24A1 chr15 65903704 65953333 +DENND4A chr15 65950384 66084631 +RAB11A chr15 66018392 66184329 +MEGF11 chr15 66187417 66546085 +DIS3L chr15 66585555 66626236 +TIPIN chr15 66628544 66679084 +MAP2K1 chr15 66679155 66784650 +SNAPC5 chr15 66782473 66790151 +RPL4 chr15 66790355 66816870 +AAGAB chr15 67493371 67547533 +C15orf61 chr15 67813406 67819628 +MAP2K5 chr15 67835047 68099461 +PIAS1 chr15 68346517 68483096 +CLN6 chr15 68499330 68549549 +FEM1B chr15 68570141 68588203 +ITGA11 chr15 68594050 68724501 +CORO2B chr15 68871308 69020145 +ANP32A chr15 69070874 69113236 +GLCE chr15 69452923 69564556 +RPLP1 chr15 69745123 69748255 +TLE3 chr15 70340129 70390515 +LARP6 chr15 71123863 71146498 +LRRC49 chr15 71145578 71342414 +THAP10 chr15 71173681 71185124 +NR2E3 chr15 72084977 72110600 +MYO9A chr15 72114632 72410918 +SENP8 chr15 72406599 72433311 +PKM chr15 72491370 72524164 +PARP6 chr15 72533522 72565340 +HEXA chr15 72635775 72668817 +ARIH1 chr15 72766667 72879692 +BBS4 chr15 72978527 73030817 +ADPGK chr15 73043710 73078187 +NEO1 chr15 73344051 73597547 +NPTN chr15 73852355 73926475 +CD276 chr15 73976307 74006859 +STOML1 chr15 74275547 74286963 +PML chr15 74287014 74340153 +ISLR chr15 74466012 74469213 +SEMA7A chr15 74701630 74726808 +UBL7 chr15 74738318 74753523 +CLK3 chr15 74890841 74932057 +EDC3 chr15 74922899 74988633 +CYP1A2 chr15 75041185 75048543 +CSK chr15 75074398 75095539 +ULK3 chr15 75128457 75135687 +SCAMP2 chr15 75136071 75165706 +MPI chr15 75182346 75191798 +FAM219B chr15 75192328 75199462 +COX5A chr15 75212132 75230509 +RPP25 chr15 75246757 75249805 +SCAMP5 chr15 75249560 75313837 +C15orf39 chr15 75487984 75504510 +COMMD4 chr15 75628232 75634268 +NEIL1 chr15 75639296 75647592 +MAN2C1 chr15 75648133 75660971 +SIN3A chr15 75661720 75748183 +PTPN9 chr15 75759462 75871630 +SNUPN chr15 75890424 75918810 +IMP3 chr15 75931426 75941047 +CSPG4 chr15 75966663 76005189 +UBE2Q2 chr15 76135622 76193419 +FBXO22 chr15 76196200 76227609 +ETFA chr15 76507696 76603813 +SCAPER chr15 76640526 77197785 +RCN2 chr15 77223960 77242601 +TSPAN3 chr15 77336359 77376326 +PEAK1 chr15 77400471 77712486 +HMG20A chr15 77712754 77777949 +LINGO1 chr15 77905369 78113242 +IDH3A chr15 78423840 78464291 +WDR61 chr15 78570177 78592136 +IREB2 chr15 78729773 78793798 +PSMA4 chr15 78832747 78841604 +MORF4L1 chr15 79102829 79190475 +CTSH chr15 79213400 79241916 +RASGRF1 chr15 79252289 79383115 +TMED3 chr15 79603404 79704334 +ST20 chr15 80191182 80216044 +BCL2A1 chr15 80253231 80263788 +ZFAND6 chr15 80351910 80430735 +ARNT2 chr15 80696692 80890278 +MESDC2 chr15 81239667 81282219 +STARD5 chr15 81601394 81616524 +EFTUD1 chr15 82422571 82555104 +RPS17 chr15 82821158 82824972 +UBE2Q2P3 chr15 83023847 83088805 +RPS17L chr15 83205504 83209315 +CPEB1 chr15 83211951 83317612 +WHAMM chr15 83478380 83503611 +FAM103A1 chr15 83654959 83659809 +C15orf40 chr15 83657193 83680393 +BTBD1 chr15 83685174 83736106 +MIR4515 chr15 83736087 83736167 +TM6SF1 chr15 83776159 83813606 +HDGFRP3 chr15 83784320 83876770 +SH3GL3 chr15 84115980 84287495 +UBE2Q2P1 chr15 85070012 85114447 +ZSCAN2 chr15 85144217 85171027 +WDR73 chr15 85185999 85197574 +NMB chr15 85198360 85201794 +SEC11A chr15 85212775 85259947 +ZNF592 chr15 85291866 85349659 +AKAP13 chr15 85923802 86292586 +KLHL25 chr15 86302554 86338261 +NTRK3 chr15 88418230 88799999 +MRPL46 chr15 89002707 89010650 +MRPS11 chr15 89010684 89022222 +DET1 chr15 89054790 89089906 +MIR1179 chr15 89151338 89151428 +AEN chr15 89164527 89175513 +MFGE8 chr15 89441916 89456642 +ABHD2 chr15 89630690 89745591 +POLG chr15 89859534 89878092 +PEX11A chr15 90220995 90234014 +AP3S2 chr15 90373831 90437574 +C15orf38-AP3S2 chr15 90377540 90456114 +C15orf38 chr15 90443159 90456188 +IDH2 chr15 90626277 90645736 +SEMA4B chr15 90703836 90772911 +CIB1 chr15 90773207 90777279 +TTLL13 chr15 90792762 90808199 +NGRN chr15 90808891 90816463 +CRTC3 chr15 91073157 91188577 +MAN2A2 chr15 91445448 91465814 +UNC45A chr15 91473410 91497323 +HDDC3 chr15 91474148 91475799 +VPS33B chr15 91541646 91565833 +ASB9P1 chr15 93338877 93339669 +CHD2 chr15 93426526 93571237 +RGMA chr15 93586636 93632433 +IGF1R chr15 99192200 99507759 +LRRC28 chr15 99791567 99930934 +MEF2A chr15 100017370 100256671 +ADAMTS17 chr15 100511794 100882210 +LINS chr15 101099574 101143435 +ASB7 chr15 101142739 101191910 +CHSY1 chr15 101715928 101792137 +VIMP chr15 101811022 101817705 +SNRPA1 chr15 101821715 101835487 +PCSK6 chr15 101840818 102065405 +TM2D3 chr15 102161847 102192594 +TARSL2 chr15 102193801 102264807 +WASH3P chr15 102501356 102516768 +DDX11L10 chr16 61553 64093 +POLR3K chr16 96407 103628 +SNRNP25 chr16 103010 107669 +MPG chr16 127006 135852 +NPRL3 chr16 134273 188859 +HBQ1 chr16 230452 231180 +LUC7L chr16 238968 279462 +ITFG3 chr16 284545 319942 +RGS11 chr16 318300 325980 +MRPL28 chr16 417384 420527 +NME4 chr16 446725 460367 +DECR2 chr16 451826 462487 +PIGQ chr16 616995 634136 +RAB40C chr16 639357 679272 +C16orf13 chr16 684429 686358 +FAM195A chr16 691813 698474 +RHOT2 chr16 718086 724174 +STUB1 chr16 730224 732870 +JMJD8 chr16 731671 734529 +WDR24 chr16 734622 740444 +METRN chr16 765115 769655 +FAM173A chr16 770581 772601 +CCDC78 chr16 772582 776954 +HAGHL chr16 776936 785525 +NARFL chr16 779753 791329 +RPUSD1 chr16 834974 838397 +CHTF18 chr16 838046 850737 +LMF1 chr16 903634 1031318 +SOX8 chr16 1031808 1036979 +UBE2I chr16 1355548 1377019 +BAIAP3 chr16 1383602 1399439 +GNPTG chr16 1401924 1413352 +UNKL chr16 1413206 1464752 +C16orf91 chr16 1469745 1479345 +CLCN7 chr16 1494935 1525581 +TELO2 chr16 1543345 1560458 +TMEM204 chr16 1578689 1605581 +CRAMP1L chr16 1662326 1727909 +MAPK8IP3 chr16 1756184 1820318 +NME3 chr16 1820287 1821731 +MRPS34 chr16 1821891 1823156 +SPSB3 chr16 1826713 1843701 +NUBP2 chr16 1832902 1839192 +IGFALS chr16 1840414 1844972 +HAGH chr16 1845621 1877195 +FAHD1 chr16 1876968 1890208 +MSRB1 chr16 1988211 1993327 +NDUFB10 chr16 2009509 2011976 +RPS2 chr16 2012053 2014861 +SNORA10 chr16 2012335 2012467 +SNHG9 chr16 2014960 2015510 +TBL3 chr16 2022038 2032934 +NTHL1 chr16 2089816 2097867 +TSC2 chr16 2097466 2138716 +PKD1 chr16 2138711 2185899 +TRAF7 chr16 2205699 2228130 +MLST8 chr16 2254249 2259417 +PGP chr16 2261998 2264808 +E4F1 chr16 2273567 2285743 +ECI1 chr16 2289396 2302301 +RNPS1 chr16 2303117 2318413 +ABCA3 chr16 2325882 2390747 +CCNF chr16 2479395 2508855 +TBC1D24 chr16 2525147 2555735 +ATP6V0C chr16 2563871 2570219 +AMDHD2 chr16 2570358 2581423 +CEMP1 chr16 2577081 2581420 +PDPK1 chr16 2587965 2653189 +ERVK13-1 chr16 2710350 2723445 +KCTD5 chr16 2732476 2759031 +SRRM2 chr16 2802330 2822539 +TCEB2 chr16 2821415 2827298 +PRSS22 chr16 2902728 2908171 +FLYWCH2 chr16 2933187 2949383 +FLYWCH1 chr16 2961938 3001209 +PAQR4 chr16 3019246 3023490 +HCFC1R1 chr16 3072621 3074287 +THOC6 chr16 3074028 3077756 +OR1F2P chr16 3265562 3266546 +ZNF200 chr16 3272325 3286221 +ZNF263 chr16 3313743 3351401 +TIGD7 chr16 3348832 3355729 +ZNF75A chr16 3355406 3368852 +MTRNR2L4 chr16 3421053 3422283 +ZSCAN32 chr16 3432085 3451065 +ZNF174 chr16 3451235 3459370 +ZNF597 chr16 3486104 3493542 +NAA60 chr16 3493611 3536963 +CLUAP1 chr16 3550924 3589048 +TRAP1 chr16 3701640 3767598 +CREBBP chr16 3775055 3930727 +SRL chr16 4239375 4292081 +PAM16 chr16 4381550 4405608 +CORO7 chr16 4404543 4475706 +DNAJA3 chr16 4475806 4506776 +NMRAL1 chr16 4511681 4545764 +HMOX2 chr16 4524691 4560348 +CDIP1 chr16 4560676 4588829 +MGRN1 chr16 4666494 4740975 +NUDT16L1 chr16 4743695 4745860 +ANKS3 chr16 4746513 4784379 +ZNF500 chr16 4798240 4817625 +ROGDI chr16 4846969 4852951 +GLYR1 chr16 4853204 4897343 +NAGPA chr16 5074845 5084142 +ALG1 chr16 5083703 5137380 +FAM86A chr16 5134305 5147809 +METTL22 chr16 8715540 8740081 +ABAT chr16 8768422 8878432 +TMEM186 chr16 8874241 8891505 +PMM2 chr16 8882680 8943188 +CARHSP1 chr16 8946799 8962866 +USP7 chr16 8985951 9058371 +C16orf72 chr16 9185505 9215497 +TEKT5 chr16 10721358 10788802 +NUBP1 chr16 10837643 10863208 +CLEC16A chr16 11038345 11276046 +PRM2 chr16 11369496 11370337 +LITAF chr16 11641853 11730237 +SNN chr16 11762270 11773015 +TXNDC11 chr16 11772936 11836734 +ZC3H7A chr16 11844442 11891123 +RSL1D1 chr16 11929056 11945442 +GSPT1 chr16 11961985 12009939 +TNFRSF17 chr16 12058964 12061925 +SNX29 chr16 12070594 12668146 +SHISA9 chr16 12995477 13334272 +ERCC4 chr16 14014014 14046202 +MKL2 chr16 14165178 14360630 +PARN chr16 14529558 14726585 +BFAR chr16 14726672 14763093 +NOMO1 chr16 14927538 14990017 +PDXDC1 chr16 15068448 15233196 +NTAN1 chr16 15131710 15149921 +RRN3 chr16 15153879 15188174 +MIR3180-4 chr16 15248760 15250877 +MPV17L chr16 15489611 15507125 +C16orf45 chr16 15528152 15718885 +KIAA0430 chr16 15688243 15737023 +FOPNL chr16 15959577 15982482 +NOMO3 chr16 16326352 16388668 +PKD1P1 chr16 16404198 16428047 +XYLT1 chr16 17195626 17564738 +NOMO2 chr16 18511182 18573533 +RPS15A chr16 18792617 18801705 +ARL6IP1 chr16 18802991 18813000 +SMG1 chr16 18816175 18937776 +COQ7 chr16 19078921 19091417 +SYT17 chr16 19179293 19279652 +GDE1 chr16 19513015 19533467 +CCP110 chr16 19535133 19564730 +C16orf62 chr16 19566562 19718115 +IQCK chr16 19727778 19868907 +GPRC5B chr16 19868616 19897489 +ACSM3 chr16 20621565 20808903 +THUMPD1 chr16 20744986 20753406 +ERI2 chr16 20791515 20911671 +LYRM1 chr16 20911190 20936328 +CRYM chr16 21250195 21314404 +SLC7A5P2 chr16 21531151 21531686 +METTL9 chr16 21608539 21668794 +IGSF6 chr16 21652609 21663981 +RRN3P1 chr16 21807951 21831731 +UQCRC2 chr16 21963981 21994981 +EEF2K chr16 22217603 22298554 +POLR3E chr16 22308730 22346424 +SMG1P1 chr16 22448329 22503541 +USP31 chr16 23072727 23160591 +COG7 chr16 23399814 23464501 +GGA2 chr16 23474863 23533316 +EARS2 chr16 23533335 23569052 +UBFD1 chr16 23568392 23585710 +NDUFAB1 chr16 23592323 23607677 +PALB2 chr16 23614488 23652631 +DCTN5 chr16 23652713 23681195 +PLK1 chr16 23688977 23701688 +PRKCB chr16 23847322 24231932 +RBBP6 chr16 24549014 24584184 +TNRC6A chr16 24741016 24838953 +SLC5A11 chr16 24857162 24922949 +ARHGAP17 chr16 24930706 25026987 +LCMT1 chr16 25123050 25189552 +AQP8 chr16 25227052 25240261 +NSMCE1 chr16 27236312 27280115 +GTF3C1 chr16 27470876 27561234 +XPO6 chr16 28109300 28223241 +CLN3 chr16 28477974 28503333 +CCDC101 chr16 28565236 28603111 +EIF3C chr16 28699879 28747051 +ATXN2L chr16 28834356 28848558 +TUFM chr16 28853732 28857729 +CD19 chr16 28943260 28950667 +NFATC2IP chr16 28962128 28978418 +SPNS1 chr16 28985542 28995869 +BOLA2 chr16 29464914 29466285 +SLX1B chr16 29465822 29469540 +SULT1A4 chr16 29467127 29476300 +QPRT chr16 29674600 29710020 +KIF22 chr16 29802040 29816706 +MAZ chr16 29817427 29823649 +PRRT2 chr16 29823177 29827201 +PAGR1 chr16 29827285 29841948 +MVP chr16 29831715 29859355 +CDIPT chr16 29869678 29875057 +SEZ6L2 chr16 29882480 29910868 +ASPHD1 chr16 29911696 29931185 +KCTD13 chr16 29916333 29938356 +TMEM219 chr16 29952206 29984373 +TAOK2 chr16 29984962 30003582 +HIRIP3 chr16 30003645 30007757 +DOC2A chr16 30016830 30034591 +ALDOA chr16 30064411 30081778 +PPP4C chr16 30087299 30096698 +YPEL3 chr16 30103635 30108236 +MAPK3 chr16 30125426 30134827 +CORO1A chr16 30194148 30200397 +BOLA2B chr16 30204255 30205627 +SLX1A chr16 30205208 30208882 +SULT1A3 chr16 30205754 30215631 +CD2BP2 chr16 30362087 30366682 +TBC1D10B chr16 30368423 30381585 +ZNF771 chr16 30418618 30440920 +DCTPP1 chr16 30434940 30441396 +SEPHS2 chr16 30454952 30457502 +ZNF747 chr16 30537244 30546173 +ZNF764 chr16 30565085 30569819 +ZNF688 chr16 30580667 30584055 +ZNF785 chr16 30585061 30597092 +ZNF689 chr16 30613879 30635333 +PRR14 chr16 30662038 30667761 +SRCAP chr16 30709530 30755602 +PHKG2 chr16 30759591 30772490 +RNF40 chr16 30773066 30787628 +MIR762 chr16 30905224 30905306 +ORAI3 chr16 30960387 30967782 +STX1B chr16 31000577 31021949 +STX4 chr16 31044210 31054296 +ZNF668 chr16 31072164 31085641 +ZNF646 chr16 31085743 31095517 +VKORC1 chr16 31102163 31107301 +BCKDK chr16 31117428 31124110 +KAT8 chr16 31127075 31142714 +FUS chr16 31191431 31203127 +PYCARD chr16 31212806 31214771 +PYDC1 chr16 31227283 31228680 +ITGAM chr16 31271311 31344213 +ITGAX chr16 31366455 31394318 +ARMC5 chr16 31469401 31478487 +TGFB1I1 chr16 31482906 31489281 +C16orf58 chr16 31500792 31520630 +AHSP chr16 31539185 31540124 +CLUHP3 chr16 31711911 31721097 +ZNF267 chr16 31885079 31928668 +HERC2P4 chr16 32114566 32199434 +LINC00273 chr16 33961052 33962503 +VPS35 chr16 46690054 46723430 +ORC6 chr16 46723555 46732306 +C16orf87 chr16 46830519 46865323 +GPT2 chr16 46918290 46965209 +DNAJA2 chr16 46989299 47007699 +NETO2 chr16 47111614 47177908 +ITFG1 chr16 47188298 47498060 +PHKB chr16 47495034 47735434 +LONP2 chr16 48278207 48397033 +SIAH1 chr16 48390275 48482313 +ZNF423 chr16 49521435 49891830 +CNEP1R1 chr16 50058321 50070999 +ADCY7 chr16 50280048 50352046 +BRD7 chr16 50347398 50402845 +NKD1 chr16 50582241 50670647 +CYLD chr16 50775961 50835846 +SALL1 chr16 51169886 51185278 +TOX3 chr16 52471917 52581714 +CHD9 chr16 53088945 53363062 +RBL2 chr16 53467889 53525561 +AKTIP chr16 53524952 53538323 +FTO chr16 53737875 54155853 +IRX5 chr16 54964774 54968397 +MMP2 chr16 55423612 55540603 +LPCAT2 chr16 55542910 55620582 +GNAO1 chr16 56225302 56391356 +AMFR chr16 56395364 56459450 +NUDT21 chr16 56463045 56486111 +OGFOD1 chr16 56485402 56513012 +BBS2 chr16 56500748 56554195 +MT3 chr16 56622986 56625000 +MT2A chr16 56642111 56643409 +MT1E chr16 56659387 56661024 +MT1X chr16 56716336 56718108 +NUP93 chr16 56764017 56878797 +HERPUD1 chr16 56965960 56977798 +CPNE2 chr16 57126449 57181878 +FAM192A chr16 57186378 57220028 +RSPRY1 chr16 57220049 57274387 +ARL2BP chr16 57279010 57287516 +PLLP chr16 57290004 57318599 +CX3CL1 chr16 57406370 57418960 +CIAPIN1 chr16 57462081 57481440 +COQ9 chr16 57481337 57495187 +POLR2C chr16 57496299 57505922 +GPR56 chr16 57644564 57698944 +KATNB1 chr16 57769642 57791162 +KIFC3 chr16 57792129 57896957 +USB1 chr16 58033450 58055522 +C16orf80 chr16 58147496 58163354 +CCDC113 chr16 58265061 58317740 +NDRG4 chr16 58496750 58547532 +SETD6 chr16 58549383 58554431 +CNOT1 chr16 58553855 58663790 +SLC38A7 chr16 58699013 58719008 +GOT2 chr16 58741035 58768261 +CDH11 chr16 64977656 65160015 +TK2 chr16 66541906 66586447 +CKLF chr16 66586466 66600154 +CMTM3 chr16 66637777 66647795 +DYNC1LI2 chr16 66754796 66785701 +NAE1 chr16 66836778 66907159 +FAM96B chr16 66965959 66968326 +CES2 chr16 66968347 66978999 +CES3 chr16 66995140 67009051 +CBFB chr16 67063019 67134961 +C16orf70 chr16 67143861 67182442 +KIAA0895L chr16 67209505 67217943 +E2F4 chr16 67226072 67232821 +LRRC29 chr16 67241042 67260951 +TMEM208 chr16 67261006 67263181 +KCTD19 chr16 67323331 67360666 +TPPP3 chr16 67423712 67427438 +ATP6V0D1 chr16 67471917 67515140 +FAM65A chr16 67552321 67580691 +CTCF chr16 67596310 67673086 +ACD chr16 67691415 67694713 +GFOD2 chr16 67708434 67753324 +CENPT chr16 67862060 67881714 +THAP11 chr16 67876213 67878097 +NUTF2 chr16 67880635 67906470 +EDC4 chr16 67906926 67918406 +PSKH1 chr16 67927175 67963581 +PSMB10 chr16 67968405 67970990 +LCAT chr16 67973653 67978034 +SLC12A4 chr16 67977377 68003504 +DDX28 chr16 68055179 68057770 +NFATC3 chr16 68118654 68263162 +PLA2G15 chr16 68279207 68294961 +SLC7A6 chr16 68298433 68335722 +SLC7A6OS chr16 68318406 68344849 +PRMT7 chr16 68344877 68392466 +SMPD3 chr16 68392231 68482591 +ZFP90 chr16 68563993 68609975 +TANGO6 chr16 68877507 69119083 +CHTF8 chr16 69151913 69166487 +CIRH1A chr16 69165194 69265033 +VPS4A chr16 69345259 69358949 +COG8 chr16 69354043 69373570 +NIP7 chr16 69373333 69377014 +TERF2 chr16 69389464 69442474 +CYB5B chr16 69458428 69500169 +NFAT5 chr16 69598997 69738569 +NOB1 chr16 69775770 69788843 +WWP2 chr16 69796209 69975644 +PDXDC2P chr16 70044654 70099848 +MIR1972-2 chr16 70064249 70064325 +PDPR chr16 70147529 70195203 +EXOSC6 chr16 70284134 70285833 +AARS chr16 70286198 70323446 +DDX19B chr16 70323566 70369186 +DDX19A chr16 70380732 70407286 +COG4 chr16 70514471 70557468 +SF3B3 chr16 70557691 70608820 +MTSS1L chr16 70695107 70719969 +VAC14 chr16 70721342 70835064 +HYDIN chr16 70841281 71264625 +ZNF23 chr16 71481500 71496998 +ZNF19 chr16 71498453 71598992 +CHST4 chr16 71559136 71572649 +AP1G1 chr16 71762913 71843104 +ATXN1L chr16 71879894 71919171 +IST1 chr16 71879899 71962913 +ZNF821 chr16 71893583 71929239 +PKD1L3 chr16 71963441 72033877 +DHODH chr16 72042487 72058954 +TXNL4B chr16 72078188 72128330 +DHX38 chr16 72127461 72146811 +PSMD7 chr16 74330673 74340186 +GLG1 chr16 74485856 74641012 +FA2H chr16 74746853 74808729 +WDR59 chr16 74907468 75034071 +LDHD chr16 75145758 75150669 +ZFP1 chr16 75182390 75206134 +BCAR1 chr16 75262928 75301951 +CFDP1 chr16 75327596 75467383 +TMEM170A chr16 75476952 75499395 +TMEM231 chr16 75572015 75590184 +GABARAPL2 chr16 75600249 75611779 +ADAT1 chr16 75630879 75657198 +KARS chr16 75661622 75682541 +TERF2IP chr16 75681684 75795770 +MON1B chr16 77224732 77236302 +NUDT7 chr16 77756411 77776157 +WWOX chr16 78133310 79246564 +CMC2 chr16 81009698 81053875 +ATMIN chr16 81069452 81080963 +GCSH chr16 81115566 81130008 +BCMO1 chr16 81272053 81324747 +MPHOSPH6 chr16 82181403 82203831 +CDH13 chr16 82660408 83830204 +HSBP1 chr16 83841448 83853342 +MBTPS1 chr16 84087368 84150511 +HSDL1 chr16 84155886 84178797 +TAF1C chr16 84211458 84220669 +KCNG4 chr16 84255823 84273356 +COTL1 chr16 84599200 84651683 +USP10 chr16 84733584 84813528 +CRISPLD2 chr16 84853590 84954374 +ZDHHC7 chr16 85007787 85045141 +LINC00311 chr16 85316564 85319569 +EMC8 chr16 85805364 85833214 +COX4I1 chr16 85832239 85840650 +IRF8 chr16 85932409 85956215 +MTHFSD chr16 86563782 86588841 +MAP1LC3B chr16 87417601 87438385 +KLHDC4 chr16 87730091 87799598 +SLC7A5 chr16 87863629 87903094 +CA5A chr16 87921625 87970135 +BANP chr16 87982850 88110924 +ZC3H18 chr16 88636789 88698374 +CYBA chr16 88709691 88717560 +MVD chr16 88718343 88729569 +SNAI3 chr16 88744090 88752901 +RNF166 chr16 88762903 88772829 +CTU2 chr16 88772871 88781794 +APRT chr16 88875747 88878352 +GALNS chr16 88880142 88923378 +TRAPPC2L chr16 88922628 88929094 +CBFA2T3 chr16 88941266 89043612 +ACSF3 chr16 89154783 89222254 +ANKRD11 chr16 89334038 89556969 +SPG7 chr16 89557325 89624176 +RPL13 chr16 89627065 89630950 +CPNE7 chr16 89642176 89663654 +CHMP1A chr16 89710839 89724253 +CDK10 chr16 89747145 89762772 +SPATA2L chr16 89762751 89768113 +ZNF276 chr16 89786808 89807311 +TCF25 chr16 89940000 89977792 +MC1R chr16 89978527 89987385 +TUBB3 chr16 89985573 90002500 +DEF8 chr16 90014333 90034468 +CENPBD1 chr16 90036206 90038942 +AFG3L1P chr16 90038994 90068569 +RPH3AL chr17 62293 236045 +VPS53 chr17 411908 624957 +FAM57A chr17 635652 646206 +GEMIN4 chr17 647654 657239 +GLOD4 chr17 660337 685581 +RNMTL1 chr17 685513 695749 +NXN chr17 702553 883010 +TIMM22 chr17 900357 906911 +ABR chr17 906758 1132315 +YWHAE chr17 1247566 1303672 +CRK chr17 1323983 1366456 +INPP5K chr17 1397865 1420182 +PITPNA-AS1 chr17 1420225 1421390 +PITPNA chr17 1421012 1466110 +SLC43A2 chr17 1472561 1532180 +PRPF8 chr17 1553923 1588176 +MIR22HG chr17 1614805 1620468 +WDR81 chr17 1619817 1641893 +SERPINF1 chr17 1665253 1680868 +SMYD4 chr17 1682779 1733928 +RPA1 chr17 1732996 1803376 +DPH1 chr17 1933404 1946724 +OVCA2 chr17 1945327 1946724 +SMG6 chr17 1963133 2207065 +SRR chr17 2206677 2228554 +TSR1 chr17 2225797 2240801 +SGSM2 chr17 2240792 2284352 +METTL16 chr17 2308856 2415185 +PAFAH1B1 chr17 2496504 2588909 +OR1D5 chr17 2965963 2966901 +OR3A3 chr17 3323862 3324827 +ASPA chr17 3375668 3406713 +TRPV1 chr17 3468738 3500392 +SHPK chr17 3511556 3539616 +CTNS chr17 3539762 3564836 +TAX1BP3 chr17 3566196 3571976 +EMC6 chr17 3572109 3572962 +ITGAE chr17 3617922 3704537 +C17orf85 chr17 3714460 3749545 +ZZEF1 chr17 3907739 4046314 +CYB5D2 chr17 4046462 4090605 +ANKFY1 chr17 4067201 4167274 +UBE2G1 chr17 4172554 4269923 +MYBBP1A chr17 4442192 4458926 +PELP1 chr17 4574679 4607632 +ARRB2 chr17 4613784 4624794 +MED11 chr17 4634723 4636905 +CXCL16 chr17 4636821 4643217 +PSMB6 chr17 4699439 4701790 +PLD2 chr17 4710391 4726729 +C17orf107 chr17 4802713 4806227 +SLC25A11 chr17 4840425 4843546 +RNF167 chr17 4843303 4848517 +PFN1 chr17 4848947 4852356 +SPAG7 chr17 4862521 4871167 +CAMTA2 chr17 4871287 4890960 +SLC52A1 chr17 4935895 4955304 +ZFP3 chr17 4981543 4999669 +ZNF232 chr17 5008836 5026411 +ZNF594 chr17 5082831 5095178 +RABEP1 chr17 5185558 5289129 +NUP88 chr17 5264258 5323480 +RPAIN chr17 5322961 5336196 +C1QBP chr17 5336097 5352150 +DHX33 chr17 5344232 5372380 +DERL2 chr17 5374571 5390131 +MIS12 chr17 5389605 5394134 +WSCD1 chr17 5675554 6027747 +TXNDC17 chr17 6544078 6547861 +MED31 chr17 6546635 6554954 +C17orf100 chr17 6554971 6556791 +XAF1 chr17 6658766 6678966 +RNASEK chr17 6915736 6917851 +RNASEK-C17orf49 chr17 6915954 6920839 +C17orf49 chr17 6917814 6920844 +ACADVL chr17 7120444 7128592 +DVL2 chr17 7128660 7137864 +PHF23 chr17 7138347 7143041 +GABARAP chr17 7143333 7146089 +CTDNEP1 chr17 7146910 7155810 +EIF5A chr17 7210318 7215774 +GPS2 chr17 7214643 7218883 +TMEM256 chr17 7306294 7307456 +NLGN2 chr17 7308193 7323179 +C17orf74 chr17 7328934 7330887 +CHRNB1 chr17 7348380 7361026 +ZBTB4 chr17 7362685 7387582 +POLR2A chr17 7387685 7417933 +SENP3 chr17 7465192 7475287 +EIF4A1 chr17 7476024 7482323 +SNORD10 chr17 7480129 7480270 +CD68 chr17 7482785 7485429 +MPDU1 chr17 7486847 7496107 +SOX15 chr17 7491496 7493488 +FXR2 chr17 7494548 7518189 +SAT2 chr17 7529552 7531194 +ATP1B2 chr17 7549945 7561086 +TP53 chr17 7565097 7590856 +WRAP53 chr17 7589389 7606820 +KDM6B chr17 7743222 7758114 +LSMD1 chr17 7760003 7788556 +CYB5D1 chr17 7761064 7765600 +CHD3 chr17 7788124 7816078 +TRAPPC1 chr17 7833663 7835441 +CNTROB chr17 7835419 7853236 +PER1 chr17 8043790 8059824 +VAMP2 chr17 8062467 8066864 +TMEM107 chr17 8076555 8079717 +C17orf59 chr17 8091652 8093564 +CTC1 chr17 8130191 8151362 +PFAS chr17 8150936 8173809 +RANGRF chr17 8191815 8193410 +KRBA2 chr17 8271955 8280029 +RPL26 chr17 8280838 8286531 +NDEL1 chr17 8316449 8393729 +MYH10 chr17 8377523 8534079 +SPDYE4 chr17 8656424 8661877 +STX8 chr17 9153788 9479908 +RCVRN chr17 9799637 9808938 +SCO1 chr17 10583654 10601692 +ADPRM chr17 10600911 10614550 +TMEM220 chr17 10602332 10633633 +ZNF18 chr17 11880756 11900827 +MAP2K4 chr17 11924141 12047147 +MYOCD chr17 12569207 12672266 +ELAC2 chr17 12895708 12921504 +COX10-AS1 chr17 13659795 13972812 +COX10 chr17 13972813 14111994 +CDRT7 chr17 14934292 14935274 +PMP22 chr17 15133095 15168643 +TRIM16 chr17 15531274 15587625 +ZNF286A chr17 15602891 15640874 +TBC1D26 chr17 15635561 15649476 +ZSWIM7 chr17 15879874 15903031 +TTC19 chr17 15902694 15948329 +NCOR1 chr17 15932471 16121499 +PIGL chr17 16120505 16252115 +UBB chr17 16284112 16286059 +C17orf76-AS1 chr17 16342136 16381992 +ZNF287 chr17 16454701 16472520 +ZNF624 chr17 16524051 16557170 +CCDC144A chr17 16592851 16707767 +USP32P1 chr17 16689803 16707769 +TNFRSF13B chr17 16832849 16875432 +MPRIP chr17 16945859 17120993 +FLCN chr17 17115526 17140502 +COPS3 chr17 17150141 17184607 +MED9 chr17 17380300 17396540 +RASD1 chr17 17397751 17399709 +PEMT chr17 17408877 17495022 +RAI1 chr17 17584787 17714767 +SMCR5 chr17 17680000 17682843 +SREBF1 chr17 17713713 17740325 +TOM1L2 chr17 17746828 17875736 +ATPAF2 chr17 17880723 17942523 +DRG2 chr17 17991200 18011285 +ALKBH5 chr17 18086392 18113268 +LLGL1 chr17 18128901 18148189 +FLII chr17 18148150 18162230 +TOP3A chr17 18174742 18218321 +SMCR8 chr17 18218624 18226517 +SHMT1 chr17 18231187 18266856 +CCDC144B chr17 18441117 18528930 +TRIM16L chr17 18601311 18639431 +TVP23B chr17 18684308 18710027 +PRPSAP2 chr17 18743398 18834581 +GRAP chr17 18923986 18950950 +EPN2 chr17 19118928 19240028 +EPN2-AS1 chr17 19199909 19209574 +MAPK7 chr17 19281034 19286857 +RNF112 chr17 19314438 19320589 +SNORA59B chr17 19460524 19461224 +ALDH3A2 chr17 19551449 19580911 +AKAP10 chr17 19807615 19881656 +SPECC1 chr17 19912657 20222339 +CCDC144NL chr17 20739760 20799453 +USP22 chr17 20902910 20947073 +DHRS7B chr17 21026677 21096578 +TMEM11 chr17 21100593 21117937 +MAP2K3 chr17 21187984 21218552 +C17orf51 chr17 21428051 21477722 +MTRNR2L1 chr17 22022437 22023991 +MIR4522 chr17 25620936 25621022 +WSB1 chr17 25621102 25640657 +LGALS9 chr17 25956824 25976586 +LYRM9 chr17 26205340 26221778 +NLK chr17 26368763 26523407 +KRT18P55 chr17 26603012 26644809 +TMEM97 chr17 26646121 26655707 +IFT20 chr17 26655352 26662515 +TNFAIP1 chr17 26662628 26674035 +POLDIP2 chr17 26673659 26684545 +TMEM199 chr17 26684604 26708716 +SARM1 chr17 26691378 26728065 +SLC46A1 chr17 26721661 26734215 +SLC13A2 chr17 26800311 26824799 +UNC119 chr17 26873725 26879686 +PIGS chr17 26880401 26898890 +ALDOC chr17 26900133 26904282 +SGK494 chr17 26934982 26941218 +KIAA0100 chr17 26941458 26972472 +SDF2 chr17 26975374 26989207 +SUPT6H chr17 26989109 27029697 +RPL23A chr17 27046411 27051377 +TRAF4 chr17 27071002 27077974 +FAM222B chr17 27082996 27182250 +ERAL1 chr17 27181956 27188085 +FLOT2 chr17 27206353 27224697 +DHRS13 chr17 27224799 27230089 +PHF12 chr17 27232268 27278789 +SEZ6 chr17 27281919 27333458 +MYO18A chr17 27400528 27507430 +TIAF1 chr17 27400537 27418537 +NUFIP2 chr17 27582854 27621136 +TAOK1 chr17 27717482 27878922 +TP53I13 chr17 27893070 27900175 +GIT1 chr17 27900487 27921072 +CORO6 chr17 27941774 27949925 +SSH2 chr17 27952956 28257294 +NSRP1 chr17 28442539 28513493 +BLMH chr17 28575218 28619074 +CPD chr17 28705923 28797007 +GOSR1 chr17 28804380 28854610 +LRRC37BP1 chr17 28956698 28964482 +CRLF3 chr17 29096406 29151794 +TEFM chr17 29224354 29233838 +ADAP2 chr17 29233362 29286340 +MIR4733 chr17 29421368 29421443 +NF1 chr17 29421945 29709134 +OMG chr17 29599031 29624557 +EVI2B chr17 29630784 29641130 +EVI2A chr17 29644578 29648902 +COPRS chr17 30178883 30186356 +UTP6 chr17 30187923 30228784 +SUZ12 chr17 30264037 30328064 +LRRC37B chr17 30334891 30380523 +RHOT1 chr17 30469473 30580393 +C17orf75 chr17 30651584 30677042 +ZNF207 chr17 30677136 30708905 +PSMD11 chr17 30771279 30810336 +TMEM98 chr17 31254928 31272124 +CCL2 chr17 32582304 32584222 +CCL8 chr17 32646055 32648421 +ZNF830 chr17 33288549 33290205 +LIG3 chr17 33307513 33332083 +RFFL chr17 33333009 33416338 +RAD51D chr17 33426811 33448541 +NLE1 chr17 33455772 33469334 +SLFN14 chr17 33875144 33885117 +AP2B1 chr17 33905065 34053436 +CCL5 chr17 34198495 34207797 +CCL3 chr17 34415602 34417515 +CCL4 chr17 34430983 34433014 +CCL3L3 chr17 34522268 34524157 +CCL4L1 chr17 34538310 34540280 +CCL3L1 chr17 34623842 34625731 +CCL4L2 chr17 34639793 34641846 +ZNHIT3 chr17 34842473 34855154 +MYO19 chr17 34851477 34899284 +PIGW chr17 34890847 34895159 +GGNBP2 chr17 34900737 34946278 +AATF chr17 35306175 35414171 +ACACA chr17 35441923 35766909 +TADA2A chr17 35766965 35839835 +DUSP14 chr17 35849937 35873603 +SYNRG chr17 35874900 35969544 +DDX52 chr17 35969787 36003493 +TBC1D3F chr17 36283971 36294915 +MRPL45 chr17 36452989 36479101 +ARHGAP23 chr17 36584662 36668628 +MLLT6 chr17 36861795 36886056 +CISD3 chr17 36886488 36891297 +PSMB3 chr17 36908989 36920484 +PIP4K2B chr17 36921942 36956379 +CWC25 chr17 36956687 36981734 +RPL23 chr17 37004118 37010096 +LASP1 chr17 37026112 37078023 +LINC00672 chr17 37081421 37084310 +ARL5C chr17 37313147 37323737 +CACNB1 chr17 37329709 37353956 +RPL19 chr17 37356536 37360980 +FBXL20 chr17 37415384 37558776 +MED1 chr17 37560538 37607539 +CDK12 chr17 37617764 37721160 +STARD3 chr17 37793318 37819737 +TCAP chr17 37820440 37822808 +PGAP3 chr17 37827375 37853050 +MIEN1 chr17 37884749 37887040 +ORMDL3 chr17 38077294 38083854 +PSMD3 chr17 38137050 38154213 +MED24 chr17 38175350 38217468 +THRA chr17 38214543 38250120 +NR1D1 chr17 38249040 38256978 +MSL1 chr17 38278551 38293042 +CASC3 chr17 38296571 38328436 +WIPF2 chr17 38375556 38440388 +SMARCE1 chr17 38781214 38804760 +KRT10 chr17 38974369 38978847 +KRT20 chr17 39032193 39041479 +KRTAP1-1 chr17 39196811 39197713 +KRTAP4-6 chr17 39295685 39296739 +KRTAP9-4 chr17 39405939 39406904 +KRT38 chr17 39592698 39597173 +KRT17 chr17 39775689 39781094 +EIF1 chr17 39845137 39848920 +LEPREL4 chr17 39958199 39968856 +NT5C3B chr17 39981335 39992523 +KLHL11 chr17 40009797 40021684 +ACLY chr17 40023161 40086795 +CNP chr17 40118759 40129749 +DNAJC7 chr17 40128451 40173394 +NKIRAS2 chr17 40163400 40177659 +KAT2A chr17 40265126 40273376 +RAB5C chr17 40276994 40307035 +STAT5B chr17 40351186 40428725 +STAT3 chr17 40465342 40540586 +ATP6V0A1 chr17 40610862 40674629 +NAGLU chr17 40688190 40696467 +COASY chr17 40713485 40718295 +MLX chr17 40719086 40725257 +PSMC3IP chr17 40724333 40729849 +FAM134C chr17 40731531 40762641 +TUBG1 chr17 40761694 40767252 +TUBG2 chr17 40811323 40819024 +CNTNAP1 chr17 40834631 40851832 +EZH1 chr17 40852293 40897071 +RAMP2-AS1 chr17 40905932 40913275 +VPS25 chr17 40925454 40931617 +COA3 chr17 40947165 40950722 +BECN1 chr17 40962152 40985367 +PSME3 chr17 40976402 40995774 +AOC2 chr17 40996617 41002724 +AARSD1 chr17 41102543 41116515 +RUNDC1 chr17 41132582 41145707 +RPL27 chr17 41150290 41154976 +IFI35 chr17 41158742 41166473 +VAT1 chr17 41166622 41177140 +RND2 chr17 41177258 41184057 +NBR2 chr17 41277627 41305688 +NBR1 chr17 41322498 41363708 +ARL4D chr17 41476327 41478505 +DHX8 chr17 41561233 41621831 +ETV4 chr17 41605212 41656988 +DUSP3 chr17 41843489 41856356 +MPP2 chr17 41952725 41987068 +FAM215A chr17 41994576 41995326 +PYY chr17 42030106 42081837 +TMEM101 chr17 42088556 42101314 +LSM12 chr17 42112003 42144987 +G6PC3 chr17 42148103 42153709 +HDAC5 chr17 42154114 42201070 +ASB16-AS1 chr17 42253341 42264085 +TMUB2 chr17 42264338 42269099 +ATXN7L3 chr17 42269173 42277481 +UBTF chr17 42282401 42298994 +RUNDC3A chr17 42385781 42396039 +SLC25A39 chr17 42396993 42402238 +GRN chr17 42422614 42430470 +GPATCH8 chr17 42472652 42580798 +C17orf104 chr17 42733762 42767676 +CCDC43 chr17 42750437 42767147 +EFTUD2 chr17 42927311 42977030 +GFAP chr17 42982376 42994305 +KIF18B chr17 43002077 43025082 +C1QL1 chr17 43037061 43045439 +DCAKD chr17 43100708 43138473 +NMT1 chr17 43128978 43186384 +ACBD4 chr17 43209967 43221548 +HEXIM1 chr17 43224684 43229468 +HEXIM2 chr17 43238067 43247407 +MAP3K14 chr17 43340488 43394414 +ARHGAP27 chr17 43471275 43511787 +PLEKHM1 chr17 43513266 43568115 +LRRC37A4P chr17 43578685 43627701 +CRHR1-IT1 chr17 43697694 43725582 +MAPT chr17 43971748 44105700 +KANSL1 chr17 44107282 44302733 +KANSL1-AS1 chr17 44270942 44274089 +ARL17B chr17 44352150 44439130 +NSFP1 chr17 44450221 44564507 +LRRC37A2 chr17 44588877 44633016 +ARL17A chr17 44594068 44657088 +NSF chr17 44668035 44834830 +GOSR2 chr17 45000483 45105003 +CDC27 chr17 45195069 45266788 +MRPL45P2 chr17 45527934 45569858 +NPEPPS chr17 45600308 45700642 +KPNB1 chr17 45726842 45762871 +TBKBP1 chr17 45771447 45789416 +OSBPL7 chr17 45884738 45899200 +MRPL10 chr17 45900638 45908900 +SCRN2 chr17 45915058 45918699 +PNPO chr17 46018872 46025654 +CDK5RAP3 chr17 46045176 46059140 +NFE2L1 chr17 46125691 46138849 +CBX1 chr17 46147414 46178883 +SNX11 chr17 46180719 46200436 +HOXB3 chr17 46626232 46682274 +MIR3185 chr17 46801770 46801837 +CALCOCO2 chr17 46908350 46943884 +ATP5G1 chr17 46970127 46973233 +UBE2Z chr17 46985731 47006418 +SNF8 chr17 47006678 47022479 +GNGT2 chr17 47280153 47287936 +ZNF652 chr17 47366568 47439835 +PHB chr17 47481414 47492246 +SPOP chr17 47676246 47755596 +SLC35B1 chr17 47778305 47786376 +KAT7 chr17 47865917 47906458 +PDK2 chr17 48172101 48189516 +COL1A1 chr17 48260650 48278993 +XYLT2 chr17 48423453 48440499 +MRPL27 chr17 48445218 48450575 +LRRC59 chr17 48452420 48474914 +RSAD1 chr17 48556161 48563336 +EPN3 chr17 48609904 48621111 +SPATA20 chr17 48620419 48633213 +ANKRD40 chr17 48770551 48785285 +LUC7L3 chr17 48796905 48833574 +TOB1 chr17 48939584 48945339 +TOB1-AS1 chr17 48944040 48987098 +SPAG9 chr17 49039535 49198226 +NME1 chr17 49230897 49239789 +NME2 chr17 49230951 49249105 +UTP18 chr17 49337889 49375297 +CA10 chr17 49707674 50237377 +TOM1L1 chr17 52976748 53039310 +COX11 chr17 53029263 53046146 +MMD chr17 53469974 53499353 +TMEM100 chr17 53796988 53809482 +PCTP chr17 53828340 53920191 +DGKE chr17 54911460 54946036 +TRIM25 chr17 54965270 54991399 +COIL chr17 55015548 55038415 +SCPEP1 chr17 55055466 55084129 +AKAP1 chr17 55162453 55198710 +MSI2 chr17 55333212 55762046 +MRPS23 chr17 55916842 55927417 +VEZF1 chr17 56048910 56065620 +SRSF1 chr17 56080721 56084707 +MKS1 chr17 56282803 56296966 +BZRAP1 chr17 56378592 56406152 +SUPT4H1 chr17 56422539 56430454 +MTMR4 chr17 56566898 56595266 +SEPT4 chr17 56597611 56618179 +TEX14 chr17 56634039 56769416 +RAD51C chr17 56769934 56811703 +TRIM37 chr17 57059999 57184282 +SKA2 chr17 57187312 57232630 +PRR11 chr17 57232860 57282066 +SMG8 chr17 57286761 57292608 +GDPD1 chr17 57297828 57353328 +YPEL2 chr17 57409050 57479090 +DHX40 chr17 57642886 57685706 +CLTC chr17 57697219 57773671 +PTRH2 chr17 57751997 57784987 +VMP1 chr17 57784553 57919616 +TUBD1 chr17 57936851 57970304 +RPS6KB1 chr17 57970447 58027925 +RNFT1 chr17 58029601 58042122 +TBC1D3P1-DHX40P1 chr17 58039724 58096413 +HEATR6 chr17 58120555 58156292 +USP32 chr17 58256455 58499831 +APPBP2 chr17 58520520 58603580 +PPM1D chr17 58677544 58741849 +BCAS3 chr17 58754814 59470199 +C17orf82 chr17 59489112 59490641 +INTS2 chr17 59942731 60005377 +MED13 chr17 60019966 60142643 +METTL2A chr17 60501228 60527454 +MRC2 chr17 60704762 60770958 +TANC2 chr17 61086917 61505060 +DCAF7 chr17 61627822 61671639 +TACO1 chr17 61678231 61685725 +STRADA chr17 61780192 61819330 +CCDC47 chr17 61822610 61853711 +DDX42 chr17 61850963 61896677 +FTSJ3 chr17 61896793 61907372 +PSMC5 chr17 61904512 61909379 +CSHL1 chr17 61986957 61996198 +TEX2 chr17 62224587 62340661 +MILR1 chr17 62461569 62464760 +POLG2 chr17 62473902 62493154 +DDX5 chr17 62495734 62504317 +MIR3064 chr17 62496891 62496957 +MIR5047 chr17 62497332 62497431 +CEP95 chr17 62502706 62538579 +SMURF2 chr17 62538413 62658186 +PLEKHM1P chr17 62775377 62833272 +LRRC37A3 chr17 62850430 62915598 +AMZ2P1 chr17 62962668 62971694 +GNA13 chr17 63006833 63052957 +RGS9 chr17 63133549 63223821 +PRKCA chr17 64298754 64806861 +CACNG4 chr17 64961026 65029514 +HELZ chr17 65066554 65242105 +PSMD12 chr17 65334032 65362743 +NOL11 chr17 65713949 65740647 +BPTF chr17 65821640 65980494 +C17orf58 chr17 65987217 65989765 +KPNA2 chr17 66031635 66042958 +LINC00674 chr17 66098049 66111659 +AMZ2 chr17 66243715 66253297 +PRKAR1A chr17 66507921 66547460 +ABCA8 chr17 66863433 66951533 +MIR4524B chr17 67095683 67095797 +KCNJ16 chr17 68049570 68131749 +SOX9 chr17 70117161 70122561 +SLC39A11 chr17 70642088 71088851 +SSTR2 chr17 71161151 71167185 +COG1 chr17 71189129 71204646 +FAM104A chr17 71203492 71232892 +C17orf80 chr17 71228372 71245091 +CDC42EP4 chr17 71279763 71308314 +RPL38 chr17 72199721 72206794 +TTYH2 chr17 72209653 72258155 +DNAI2 chr17 72270386 72311023 +BTBD17 chr17 72352555 72358085 +CD300A chr17 72462555 72480933 +CD300E chr17 72606026 72619897 +SLC9A3R1 chr17 72744791 72765492 +NAT9 chr17 72766686 72772506 +TMEM104 chr17 72772622 72835918 +FDXR chr17 72858619 72869156 +USH1G chr17 72912176 72919351 +HID1 chr17 72946838 72969261 +CDR2L chr17 72983727 73001895 +ICT1 chr17 73008765 73017356 +ATP5H chr17 73034958 73043080 +ARMC7 chr17 73106047 73126360 +NT5C chr17 73126320 73127890 +HN1 chr17 73131343 73164376 +SUMO2 chr17 73163408 73179078 +NUP85 chr17 73201754 73231853 +GGA3 chr17 73232694 73258444 +MRPS7 chr17 73257755 73262454 +MIF4GD chr17 73262309 73267308 +SLC25A19 chr17 73269073 73285591 +GRB2 chr17 73314157 73401790 +KIAA0195 chr17 73437240 73496171 +CASKIN2 chr17 73496342 73511664 +TSEN54 chr17 73512141 73520820 +RECQL5 chr17 73622925 73663269 +SAP30BP chr17 73663196 73704142 +GALK1 chr17 73747675 73761792 +H3F3B chr17 73772515 73781974 +UNK chr17 73780681 73821886 +WBP2 chr17 73841780 73852588 +MRPL38 chr17 73894724 73905899 +ACOX1 chr17 73937588 73975515 +TEN1 chr17 73975301 73996667 +CDK3 chr17 73996987 74002080 +SRP68 chr17 74035184 74068734 +EXOC7 chr17 74077087 74117657 +RNF157-AS1 chr17 74136637 74150731 +RNF157 chr17 74138534 74236454 +PRPSAP1 chr17 74305567 74380602 +UBE2O chr17 74385532 74449288 +RHBDF2 chr17 74466973 74497872 +PRCD chr17 74523668 74549660 +SNHG16 chr17 74553848 74561430 +MXRA7 chr17 74668633 74707098 +JMJD6 chr17 74708919 74722866 +METTL23 chr17 74722912 74730018 +SRSF2 chr17 74730197 74733456 +MFSD11 chr17 74731947 74777531 +MIR636 chr17 74732532 74732630 +SEC14L1 chr17 75082798 75213179 +LINC00338 chr17 75082798 75091068 +TNRC6C chr17 76000249 76104916 +TMC6 chr17 76106539 76128488 +C17orf99 chr17 76142434 76162258 +SYNGR2 chr17 76164639 76169608 +AFMID chr17 76183398 76203782 +PGS1 chr17 76374721 76421195 +CYTH1 chr17 76670130 76778379 +USP36 chr17 76783463 76837523 +TIMP2 chr17 76849059 76921469 +LGALS3BP chr17 76967320 76976191 +CBX8 chr17 77765931 77775482 +TBC1D16 chr17 77906142 78009647 +GAA chr17 78075355 78093678 +EIF4A3 chr17 78109013 78120982 +SGSH chr17 78180515 78194722 +SLC26A11 chr17 78193498 78227299 +RNF213 chr17 78234665 78372586 +ENDOV chr17 78388965 78411886 +CHMP6 chr17 78965398 78983317 +BAIAP2 chr17 79008948 79091232 +AATK chr17 79091095 79139877 +MIR1250 chr17 79106996 79107108 +AZI1 chr17 79163393 79196799 +C17orf89 chr17 79213039 79215081 +SLC38A10 chr17 79218800 79269347 +ACTG1 chr17 79476997 79490873 +C17orf70 chr17 79506911 79520987 +NPLOC4 chr17 79523913 79615495 +OXLD1 chr17 79632066 79633665 +CCDC137 chr17 79633394 79640934 +ARL16 chr17 79648204 79650954 +HGS chr17 79650356 79670168 +MRPL12 chr17 79670387 79674556 +FAM195B chr17 79780287 79791178 +P4HB chr17 79801035 79818570 +ARHGDIA chr17 79825597 79829282 +ANAPC11 chr17 79848666 79858867 +PCYT2 chr17 79858841 79869340 +SIRT7 chr17 79869815 79879199 +MAFG chr17 79876146 79885590 +ASPSCR1 chr17 79934683 79975282 +STRA13 chr17 79976578 79981983 +RAC3 chr17 79989500 79992080 +DCXR chr17 79993012 79995608 +RFNG chr17 80005778 80009707 +GPS1 chr17 80008567 80015346 +FASN chr17 80036214 80056208 +CSNK1D chr17 80196899 80231607 +SECTM1 chr17 80278900 80291950 +OGFOD3 chr17 80347099 80376513 +HEXDC chr17 80376194 80400521 +C17orf62 chr17 80400465 80408705 +NARF chr17 80416056 80448413 +FOXK2 chr17 80477589 80602538 +WDR45B chr17 80572438 80606429 +RAB40B chr17 80612849 80656604 +FN3KRP chr17 80674559 80688204 +TBCD chr17 80709940 80900724 +B3GNTL1 chr17 80900031 81009686 +ROCK1P1 chr18 109065 122219 +USP14 chr18 158383 214629 +THOC1 chr18 214520 268050 +CETN1 chr18 580367 581524 +C18orf56 chr18 641320 658340 +TYMS chr18 657604 673578 +YES1 chr18 721588 812547 +METTL4 chr18 2537524 2571508 +CBX3P2 chr18 2652169 2655394 +SMCHD1 chr18 2655737 2805015 +LPIN2 chr18 2916992 3013313 +MYL12A chr18 3247479 3256234 +MYL12B chr18 3261907 3278282 +TGIF1 chr18 3411606 3458409 +DLGAP1 chr18 3496030 4455335 +DLGAP1-AS3 chr18 3878180 3897069 +LINC00667 chr18 5232875 5246507 +LINC00526 chr18 5236723 5238597 +EPB41L3 chr18 5392383 5630699 +ARHGAP28 chr18 6729717 6915715 +PTPRM chr18 7566780 8406859 +NDUFV2 chr18 9102628 9134343 +ANKRD12 chr18 9136226 9285983 +TWSG1 chr18 9334765 9402418 +RALBP1 chr18 9475007 9538114 +PPP4R1 chr18 9546789 9615238 +RAB31 chr18 9708162 9862548 +VAPA chr18 9913999 9960018 +APCDD1 chr18 10454625 10489945 +NAPG chr18 10525902 10552758 +GNAL chr18 11688955 11885684 +CHMP1B chr18 11851395 11854448 +MPPE1 chr18 11882621 11909222 +AFG3L2 chr18 12328943 12377313 +SPIRE1 chr18 12446511 12658133 +PSMG2 chr18 12658737 12725739 +CEP76 chr18 12661832 12702776 +PTPN2 chr18 12785477 12929642 +SEH1L chr18 12947132 12987535 +CEP192 chr18 12991361 13125051 +LDLRAD4 chr18 13217497 13652754 +FAM210A chr18 13663346 13726662 +RNMT chr18 13726659 13764557 +ZNF519 chr18 14057456 14132489 +ANKRD20A5P chr18 14179096 14249455 +ROCK1 chr18 18526867 18691812 +ESCO1 chr18 19109242 19180845 +SNRPD1 chr18 19192228 19210417 +ABHD3 chr18 19230858 19284766 +MIB1 chr18 19284918 19450918 +RBBP8 chr18 20378224 20606451 +CABLES1 chr18 20714528 20840431 +TMEM241 chr18 20777108 21017925 +RIOK3 chr18 21032787 21066567 +C18orf8 chr18 21083473 21111746 +NPC1 chr18 21086148 21166862 +TTC39C chr18 21572737 21715574 +CABYR chr18 21718942 21741567 +OSBPL1A chr18 21742008 21977844 +MIR320C2 chr18 21901650 21901699 +IMPACT chr18 22006580 22033499 +ZNF521 chr18 22641890 22932154 +SS18 chr18 23596578 23671181 +CHST9 chr18 24495595 24765281 +CDH2 chr18 25530930 25757410 +B4GALT6 chr18 29202210 29265799 +TRAPPC8 chr18 29409136 29533099 +RNF138 chr18 29671818 29711524 +ASXL3 chr18 31158579 31331156 +DTNA chr18 32073254 32471808 +MAPRE2 chr18 32556892 32723434 +ZNF397 chr18 32820994 32847097 +ZSCAN30 chr18 32831032 32870196 +ZNF271 chr18 32870246 32890730 +ZNF24 chr18 32912176 32925446 +INO80C chr18 33032194 33077955 +GALNT1 chr18 33161081 33291798 +C18orf21 chr18 33552046 33559241 +RPRD1A chr18 33564350 33647539 +SLC39A6 chr18 33688495 33709348 +ELP2 chr18 33709407 33757909 +TPGS2 chr18 34359987 34409158 +KIAA1328 chr18 34409069 34812135 +CELF4 chr18 34823010 35146000 +PIK3C3 chr18 39535171 39667794 +SETBP1 chr18 42260138 42648475 +SLC14A1 chr18 43304092 43332485 +EPG5 chr18 43427574 43547240 +ATP5A1 chr18 43664110 43684300 +HAUS1 chr18 43684298 43708299 +C18orf25 chr18 43753519 43846949 +PIAS2 chr18 44388353 44500123 +HDHD2 chr18 44633774 44676891 +IER3IP1 chr18 44661027 44702652 +SMAD2 chr18 45357922 45457515 +CTIF chr18 46065417 46389588 +SMAD7 chr18 46446223 46477081 +DYM chr18 46570039 46987717 +C18orf32 chr18 47008028 47013622 +RPL17-C18orf32 chr18 47008051 47017956 +RPL17 chr18 47014851 47018906 +SNORD58A chr18 47017653 47017717 +ACAA2 chr18 47309869 47340330 +MBD1 chr18 47793252 47808144 +CXXC1 chr18 47808713 47814674 +MRO chr18 48324574 48351772 +ME2 chr18 48405419 48474691 +ELAC1 chr18 48494361 48514491 +SMAD4 chr18 48494410 48611415 +MEX3C chr18 48700920 48744674 +DCC chr18 49866542 51057784 +POLI chr18 51795774 51847636 +C18orf54 chr18 51884287 51911588 +TCF4 chr18 52889562 53332018 +TXNL1 chr18 54264439 54318831 +WDR7 chr18 54318574 54698828 +ST8SIA3 chr18 55018044 55038962 +FECH chr18 55215515 55254004 +NARS chr18 55267888 55289445 +ZNF532 chr18 56529832 56653712 +SEC11C chr18 56806709 56826068 +LMAN1 chr18 56995055 57027194 +PMAIP1 chr18 57567180 57571538 +CDH20 chr18 59000815 59223006 +PIGN chr18 59710800 59854351 +KIAA1468 chr18 59854491 59974355 +PHLPP1 chr18 60382672 60647666 +KDSR chr18 60994959 61034743 +VPS4B chr18 61056423 61089716 +SERPINB5 chr18 61143994 61172318 +SERPINB8 chr18 61637159 61672278 +DSEL chr18 65173819 65184217 +TMX3 chr18 66340925 66382535 +RTTN chr18 67671029 67873181 +SOCS6 chr18 67956137 67997436 +TIMM21 chr18 71815746 71826197 +CYB5A chr18 71920530 71959251 +FAM69C chr18 72102963 72125179 +CNDP2 chr18 72163051 72188366 +CNDP1 chr18 72201675 72254448 +ZNF407 chr18 72265106 72777627 +ZADH2 chr18 72907063 72921303 +TSHZ1 chr18 72922710 73001905 +ZNF516 chr18 74069644 74207146 +MBP chr18 74690783 74845639 +ATP9B chr18 76829285 77138283 +TXNL4A chr18 77732867 77793949 +RBFA chr18 77794358 77806397 +WASH5P chr19 60105 71626 +PPAP2C chr19 281040 291393 +MIER2 chr19 305575 344798 +SHC2 chr19 416583 460996 +TPGS1 chr19 507299 519654 +BSG chr19 571297 583493 +POLRMT chr19 617223 633597 +RNF126 chr19 647526 663277 +PALM chr19 708953 748329 +PTBP1 chr19 797075 812327 +MED16 chr19 867962 893218 +R3HDM4 chr19 896503 913240 +WDR18 chr19 983174 994569 +TMEM259 chr19 1009647 1021117 +HMHA1 chr19 1065922 1086627 +POLR2E chr19 1086594 1095598 +GPX4 chr19 1103936 1106787 +ATP5D chr19 1241749 1244824 +MIDN chr19 1248552 1259142 +CIRBP chr19 1259384 1274879 +C19orf24 chr19 1275026 1279248 +MUM1 chr19 1285890 1378430 +EFNA2 chr19 1286153 1301430 +NDUFS7 chr19 1383526 1395583 +GAMT chr19 1397091 1401569 +RPS15 chr19 1438358 1440583 +APC2 chr19 1446300 1473243 +C19orf25 chr19 1461142 1479555 +PLK5 chr19 1524073 1535455 +MBD3 chr19 1576639 1592882 +UQCR11 chr19 1578338 1605444 +TCF3 chr19 1609291 1652604 +SCAMP4 chr19 1905213 1926016 +BTBD2 chr19 1985447 2034880 +MOB3A chr19 2071037 2096672 +AP3D1 chr19 2100988 2164464 +PLEKHJ1 chr19 2230002 2237703 +SF3A2 chr19 2236520 2248678 +OAZ1 chr19 2269485 2273487 +LSM7 chr19 2321516 2328619 +SPPL2B chr19 2328614 2355099 +TIMM13 chr19 2425622 2427892 +LMNB2 chr19 2427636 2456994 +GADD45B chr19 2476120 2478257 +GNG7 chr19 2511217 2702707 +SLC39A3 chr19 2732202 2740150 +SGTA chr19 2754712 2783369 +THOP1 chr19 2785458 2813599 +ZNF554 chr19 2819872 2835771 +ZNF555 chr19 2841433 2860482 +ZNF57 chr19 2900896 2918474 +TLE2 chr19 2997636 3047633 +AES chr19 3052908 3063105 +S1PR4 chr19 3172344 3180329 +NCLN chr19 3185561 3209573 +NFIC chr19 3359561 3469215 +DOHH chr19 3490819 3500938 +FZR1 chr19 3506271 3538328 +HMG20B chr19 3572775 3579086 +GIPC3 chr19 3585551 3593539 +APBA3 chr19 3750817 3761697 +MRPL54 chr19 3762662 3768573 +ZFR2 chr19 3804022 3869030 +ATCAY chr19 3879862 3928077 +DAPK3 chr19 3958451 3971121 +EEF2 chr19 3976054 3985467 +PIAS4 chr19 4007644 4039384 +MAP2K2 chr19 4090319 4124126 +SIRT6 chr19 4174106 4182601 +CCDC94 chr19 4247076 4269087 +FSD1 chr19 4304597 4323840 +SH3GL1 chr19 4360367 4400544 +CHAF1A chr19 4402659 4445015 +UBXN6 chr19 4444996 4457819 +TNFAIP8L1 chr19 4639530 4655580 +C19orf10 chr19 4641386 4670382 +DPP9 chr19 4675236 4724685 +FEM1A chr19 4791728 4795571 +PLIN3 chr19 4838353 4867780 +UHRF1 chr19 4903092 4962165 +PTPRS chr19 5158506 5340814 +SAFB2 chr19 5587010 5624057 +SAFB chr19 5623046 5668489 +RPL36 chr19 5674958 5691887 +C19orf70 chr19 5678432 5680907 +LONP1 chr19 5691845 5720583 +PRR22 chr19 5782971 5784776 +DUS3L chr19 5784843 5791249 +NDUFA11 chr19 5891287 5904017 +CAPS chr19 5911718 5915888 +RANBP3 chr19 5916150 5978153 +RFX2 chr19 5993175 6199583 +CLPP chr19 6361463 6368919 +ALKBH7 chr19 6372444 6375042 +GTF2F1 chr19 6379580 6393992 +SLC25A41 chr19 6426048 6433790 +SLC25A23 chr19 6436090 6465214 +TUBB4A chr19 6494330 6502859 +C3 chr19 6677715 6730573 +GPR108 chr19 6729925 6737614 +TRIP10 chr19 6737936 6751537 +ZNF557 chr19 7069455 7087979 +INSR chr19 7112266 7294045 +ARHGEF18 chr19 7459999 7537363 +PEX11G chr19 7541761 7562335 +MCOLN1 chr19 7587512 7598895 +PNPLA6 chr19 7598890 7626650 +XAB2 chr19 7684411 7694451 +PET100 chr19 7694623 7696842 +PCP2 chr19 7696497 7698634 +TRAPPC5 chr19 7745729 7747744 +EVI5L chr19 7895119 7929862 +TIMM44 chr19 7991603 8008805 +ELAVL1 chr19 8023463 8070543 +CERS4 chr19 8271620 8327305 +CD320 chr19 8367011 8373240 +NDUFA7 chr19 8373167 8386263 +RPS28 chr19 8386042 8388224 +RAB11B chr19 8454865 8469318 +MARCH2 chr19 8478154 8503901 +HNRNPM chr19 8509651 8553998 +ZNF414 chr19 8575462 8579048 +ZNF558 chr19 8920380 8943004 +OR1M1 chr19 9203855 9204889 +ZNF317 chr19 9251056 9274100 +ZNF559 chr19 9434448 9461838 +ZNF177 chr19 9435021 9493293 +ZNF266 chr19 9523272 9546254 +ZNF426 chr19 9638667 9649303 +ZNF121 chr19 9671029 9695209 +ZNF561 chr19 9715356 9732075 +ZNF562 chr19 9759330 9785776 +ZNF812 chr19 9800600 9811452 +FBXL12 chr19 9920943 9938492 +UBL5 chr19 9938568 9940791 +PIN1 chr19 9945933 9960358 +OLFM2 chr19 9964394 10047228 +C19orf66 chr19 10196798 10203928 +PPAN chr19 10216965 10225414 +SNORD105 chr19 10218327 10218411 +EIF3G chr19 10225693 10230596 +DNMT1 chr19 10244021 10341962 +S1PR2 chr19 10334520 10341948 +MRPL4 chr19 10362577 10370721 +ICAM1 chr19 10381511 10397291 +ICAM5 chr19 10400657 10407454 +FDX1L chr19 10416103 10426691 +RAVER1 chr19 10426888 10444316 +TYK2 chr19 10461209 10491352 +CDC37 chr19 10501810 10530797 +KEAP1 chr19 10596796 10614417 +S1PR5 chr19 10623623 10628607 +ATG4D chr19 10654571 10664094 +KRI1 chr19 10663761 10676713 +SLC44A2 chr19 10713133 10755235 +ILF3-AS1 chr19 10762538 10764520 +ILF3 chr19 10764937 10803093 +QTRT1 chr19 10812106 10824113 +DNM2 chr19 10828755 10944164 +TMED1 chr19 10943114 10946994 +YIPF2 chr19 11033444 11039357 +C19orf52 chr19 11039409 11044211 +SMARCA4 chr19 11071598 11176071 +LDLR chr19 11200038 11244492 +TMEM205 chr19 11453452 11457194 +CCDC159 chr19 11455360 11465620 +SWSAP1 chr19 11485361 11487627 +EPOR chr19 11488236 11495018 +PRKCSH chr19 11546109 11561783 +ECSIT chr19 11616731 11639989 +ELOF1 chr19 11661962 11670051 +ZNF627 chr19 11670189 11729950 +ACP5 chr19 11685475 11689823 +HNRNPA1P10 chr19 11776884 11777845 +ZNF823 chr19 11832080 11849824 +ZNF441 chr19 11877815 11894893 +ZNF491 chr19 11908482 11920437 +ZNF440 chr19 11925099 11946016 +ZNF439 chr19 11959541 11994565 +ZNF700 chr19 12035883 12061588 +ZNF763 chr19 12035890 12091196 +ZNF433 chr19 12125547 12146556 +ZNF844 chr19 12175514 12192380 +ZNF136 chr19 12273879 12300064 +ZNF44 chr19 12335501 12405702 +ZNF563 chr19 12428291 12444534 +ZNF799 chr19 12500830 12512085 +ZNF443 chr19 12540521 12551926 +ZNF709 chr19 12571998 12624668 +ZNF564 chr19 12636184 12662356 +ZNF490 chr19 12688775 12750912 +ZNF791 chr19 12721732 12742735 +MAN2B1 chr19 12757325 12777556 +WDR83 chr19 12777614 12786646 +WDR83OS chr19 12778885 12782170 +DHPS chr19 12786531 12792716 +TNPO2 chr19 12810008 12834825 +C19orf43 chr19 12841454 12845589 +ASNA1 chr19 12847953 12859137 +HOOK2 chr19 12873817 12983554 +JUNB chr19 12902310 12904124 +PRDX2 chr19 12907634 12912694 +RNASEH2A chr19 12917394 12924452 +DNASE2 chr19 12986025 12992282 +GCDH chr19 13001840 13025021 +FARSA chr19 13033293 13044851 +CALR chr19 13049392 13055303 +RAD23A chr19 13056669 13064456 +GADD45GIP1 chr19 13064972 13068050 +DAND5 chr19 13075973 13085567 +NFIX chr19 13106422 13209610 +TRMT1 chr19 13215716 13228381 +STX10 chr19 13254872 13261197 +IER2 chr19 13261229 13265722 +CACNA1A chr19 13317256 13734804 +CCDC130 chr19 13842574 13874110 +MRI1 chr19 13875346 13885096 +C19orf53 chr19 13884982 13889276 +MIR181D chr19 13985689 13985825 +C19orf57 chr19 13993161 14017265 +PRKACA chr19 14202500 14228896 +LPHN1 chr19 14260750 14316999 +CD97 chr19 14491313 14519537 +DDX39A chr19 14519631 14530192 +PKN1 chr19 14543865 14582679 +GIPC1 chr19 14588572 14606944 +DNAJB1 chr19 14625582 14640582 +TECR chr19 14627897 14676792 +MIR639 chr19 14640355 14640452 +NDUFB7 chr19 14676890 14682874 +ZNF333 chr19 14800613 14844558 +OR7C2 chr19 15052301 15053260 +SLC1A6 chr19 15060846 15133802 +ILVBL chr19 15225795 15236596 +AKAP8 chr19 15464196 15490609 +AKAP8L chr19 15490859 15529952 +OR10H2 chr19 15838834 15839862 +TPM4 chr19 16177831 16213813 +RAB8A chr19 16222439 16245044 +FAM32A chr19 16296191 16302857 +AP1M1 chr19 16308389 16346160 +EPS15L1 chr19 16466050 16582896 +CHERP chr19 16628700 16653341 +SLC35E1 chr19 16660642 16683193 +SMIM7 chr19 16741562 16771253 +HAUS8 chr19 17160539 17186435 +MYO9B chr19 17186591 17325346 +USE1 chr19 17326155 17330638 +BABAM1 chr19 17378159 17392058 +MRPL34 chr19 17403418 17417652 +DDA1 chr19 17420327 17434110 +ANO8 chr19 17434032 17445638 +GTPBP3 chr19 17445729 17453544 +PLVAP chr19 17462257 17488159 +BST2 chr19 17513748 17516457 +MVB12A chr19 17516531 17544533 +SLC27A1 chr19 17579578 17616977 +PGLS chr19 17622438 17632097 +UNC13A chr19 17712137 17799401 +MAP1S chr19 17830051 17845325 +FCHO1 chr19 17858527 17899377 +RPL18A chr19 17970685 17974962 +CCDC124 chr19 18043825 18054800 +ARRDC2 chr19 18111941 18124911 +MAST3 chr19 18208603 18262502 +PIK3R2 chr19 18263968 18288927 +IFI30 chr19 18283972 18288927 +MPV17L2 chr19 18303992 18307758 +RAB3A chr19 18307594 18314884 +KIAA1683 chr19 18367908 18385319 +JUND chr19 18390563 18392432 +MIR3188 chr19 18392887 18392971 +LSM4 chr19 18417040 18434084 +ELL chr19 18553473 18632937 +FKBP8 chr19 18642561 18654887 +KXD1 chr19 18668572 18680197 +UBA52 chr19 18682540 18688360 +C19orf60 chr19 18699495 18703146 +TMEM59L chr19 18718240 18731849 +CRTC1 chr19 18794487 18893004 +CERS1 chr19 18979361 19007536 +COPE chr19 19010323 19030206 +DDX49 chr19 19030484 19039436 +HOMER3 chr19 19040010 19052070 +SUGP2 chr19 19101697 19144832 +ARMC6 chr19 19144384 19170563 +SLC25A42 chr19 19174808 19223697 +TMEM161A chr19 19229978 19249322 +MEF2BNB chr19 19287712 19303400 +RFXANK chr19 19303008 19312678 +NR2C2AP chr19 19312225 19314233 +NCAN chr19 19322782 19363042 +SUGP1 chr19 19386827 19431653 +MAU2 chr19 19431490 19469563 +NDUFA13 chr19 19626545 19644285 +YJEFN3 chr19 19627036 19648390 +LPAR2 chr19 19734477 19739739 +ATP13A1 chr19 19756007 19774502 +ZNF14 chr19 19821280 19843906 +ZNF506 chr19 19896648 19932560 +ZNF253 chr19 19976695 20005483 +ZNF93 chr19 20011722 20046384 +ZNF682 chr19 20107867 20150315 +ZNF737 chr19 20718631 20748615 +ZNF626 chr19 20802867 20844402 +ZNF85 chr19 21106028 21133503 +ZNF430 chr19 21203426 21242856 +ZNF714 chr19 21264965 21308073 +ZNF431 chr19 21324827 21373034 +ZNF708 chr19 21473962 21512227 +ZNF738 chr19 21541732 21562104 +ZNF493 chr19 21579921 21610375 +ZNF429 chr19 21679484 21739072 +ZNF100 chr19 21905568 21950430 +ZNF43 chr19 21987752 22034927 +ZNF676 chr19 22361893 22379753 +ZNF724P chr19 23404401 23433192 +ZNF91 chr19 23487793 23578362 +ZNF675 chr19 23708433 23870022 +ZNF681 chr19 23921997 23941693 +RPSAP58 chr19 23945807 24010937 +ZNF254 chr19 24216276 24312654 +UQCRFS1 chr19 29698173 29704448 +POP4 chr19 30094924 30108144 +ZNF536 chr19 30719197 31204445 +DPY19L3 chr19 32896449 32976801 +PDCD5 chr19 33071974 33078358 +ANKRD27 chr19 33087913 33167503 +NUDT19 chr19 33182867 33204702 +GPATCH1 chr19 33571786 33621448 +LRP3 chr19 33668509 33698694 +CEBPG chr19 33864236 33873592 +PEPD chr19 33877856 34012700 +LSM14A chr19 34663409 34720420 +KIAA0355 chr19 34745442 34846491 +GPI chr19 34850385 34893061 +PDCD2L chr19 34895289 34917073 +UBA2 chr19 34919257 34960853 +ZNF302 chr19 35168544 35177302 +ZNF181 chr19 35225060 35233777 +ZNF30 chr19 35417807 35436074 +GRAMD1A chr19 35485688 35517375 +FXYD7 chr19 35634154 35645204 +FXYD5 chr19 35645633 35660786 +FAM187B chr19 35715703 35719632 +USF2 chr19 35759881 35770724 +MAG chr19 35783028 35804707 +KRTDAP chr19 35978226 35986460 +TMEM147 chr19 36036497 36038428 +RBM42 chr19 36119932 36128588 +COX6B1 chr19 36139125 36149763 +UPK1A chr19 36157715 36169367 +IGFLR1 chr19 36230058 36233354 +U2AF1L4 chr19 36233365 36236346 +PSENEN chr19 36236015 36237911 +LIN37 chr19 36239262 36245420 +ARHGAP33 chr19 36265434 36279724 +PRODH2 chr19 36290890 36304201 +APLP1 chr19 36358801 36370693 +NFKBID chr19 36378555 36393205 +HCST chr19 36393382 36395279 +TYROBP chr19 36395303 36399197 +SDHAF1 chr19 36486090 36487220 +ALKBH6 chr19 36500022 36505141 +CLIP3 chr19 36505562 36524245 +THAP8 chr19 36525887 36545664 +POLR2I chr19 36604612 36606248 +TBCB chr19 36605191 36616849 +CAPNS1 chr19 36630477 36641255 +ZNF565 chr19 36673188 36737159 +ZNF146 chr19 36705504 36729676 +LINC00665 chr19 36803969 36822620 +ZFP82 chr19 36874022 36909558 +ZNF566 chr19 36936021 36980804 +ZNF260 chr19 37001597 37019562 +ZNF529 chr19 37025676 37096178 +ZNF567 chr19 37178514 37218603 +ZNF850 chr19 37205285 37263727 +ZNF790 chr19 37308330 37341689 +ZNF345 chr19 37341263 37403931 +ZNF568 chr19 37407231 37489602 +ZNF420 chr19 37498759 37621216 +ZNF585A chr19 37597636 37663643 +ZNF383 chr19 37708828 37734828 +HKR1 chr19 37803739 37860267 +ZNF527 chr19 37861963 37883968 +ZNF570 chr19 37958487 37976260 +ZNF793 chr19 37997841 38039664 +ZNF540 chr19 38042308 38104998 +ZNF571 chr19 38045684 38085673 +ZNF573 chr19 38226734 38307940 +WDR87 chr19 38375463 38397317 +SPINT2 chr19 38734675 38783254 +PPP1R14A chr19 38741877 38747231 +YIF1B chr19 38795753 38807913 +PSMD8 chr19 38865176 38874464 +GGN chr19 38874905 38878722 +FAM98C chr19 38893775 38899728 +EIF3K chr19 39109735 39127595 +ACTN4 chr19 39138289 39222223 +LGALS7B chr19 39279851 39282389 +ECH1 chr19 39306062 39322645 +HNRNPL chr19 39327028 39342987 +SIRT2 chr19 39369197 39390502 +SARS2 chr19 39405906 39440495 +MRPS12 chr19 39421188 39423802 +FBXO27 chr19 39481354 39523425 +GMFG chr19 39818993 39833012 +PAF1 chr19 39876492 39881835 +MED29 chr19 39881943 39891277 +ZFP36 chr19 39897453 39900052 +PLEKHG2 chr19 39903225 39919054 +RPS16 chr19 39923847 39926588 +SUPT5H chr19 39926796 39967310 +TIMM50 chr19 39971052 39984422 +DLL3 chr19 39989535 39999121 +EID2B chr19 40021630 40023494 +DYRK1B chr19 40315990 40324841 +FBL chr19 40325098 40337054 +PSMC4 chr19 40476912 40487348 +ZNF780A chr19 40575059 40596845 +AKT2 chr19 40736224 40791443 +PLD3 chr19 40854363 40886346 +SERTAD1 chr19 40927499 40931932 +SERTAD3 chr19 40946751 40950612 +BLVRB chr19 40953696 40971747 +SHKBP1 chr19 41082757 41097305 +LTBP4 chr19 41098789 41135725 +ADCK4 chr19 41197434 41224112 +SNRPA chr19 41256542 41271294 +RAB4B chr19 41284121 41302847 +RAB4B-EGLN2 chr19 41284147 41314103 +EGLN2 chr19 41304901 41314338 +AXL chr19 41725108 41767671 +HNRNPUL1 chr19 41768401 41813503 +CCDC97 chr19 41816094 41830788 +BCKDHA chr19 41884215 41930910 +EXOSC5 chr19 41892279 41903384 +ATP5SL chr19 41937223 41946622 +C19orf69 chr19 41949063 41950670 +RPS19 chr19 42363988 42376994 +CD79A chr19 42381190 42385439 +ARHGEF1 chr19 42387228 42434302 +RABAC1 chr19 42460833 42463542 +ZNF574 chr19 42572629 42585701 +DEDD2 chr19 42702750 42724292 +ZNF526 chr19 42724423 42732353 +GSK3A chr19 42734338 42746777 +PAFAH1B3 chr19 42801185 42807698 +CEACAM1 chr19 43011456 43065386 +PSG5 chr19 43670408 43690688 +ETHE1 chr19 44010871 44031396 +XRCC1 chr19 44047192 44084625 +PINLYP chr19 44080952 44088116 +IRGQ chr19 44088521 44100287 +ZNF576 chr19 44100544 44105309 +ZNF428 chr19 44111371 44124026 +PLAUR chr19 44150247 44174699 +SMG9 chr19 44235301 44259142 +LYPD5 chr19 44300081 44331358 +ZNF404 chr19 44376515 44405537 +ZNF221 chr19 44455375 44471861 +ZNF224 chr19 44598492 44612479 +ZNF225 chr19 44616334 44637027 +ZNF234 chr19 44645710 44663156 +ZNF226 chr19 44669226 44682534 +ZNF227 chr19 44711700 44741421 +ZNF285 chr19 44886459 44905774 +CEACAM20 chr19 45005729 45033811 +PVR chr19 45147098 45166850 +TOMM40 chr19 45393826 45406946 +APOE chr19 45409011 45412650 +APOC1 chr19 45417504 45422606 +APOC2 chr19 45449243 45452822 +CLPTM1 chr19 45457842 45496599 +ZNF296 chr19 45574758 45579846 +GEMIN7 chr19 45582453 45594782 +TRAPPC6A chr19 45666186 45681495 +BLOC1S3 chr19 45682003 45685059 +ERCC2 chr19 45853095 45874176 +ERCC1 chr19 45910591 45982086 +FOSB chr19 45971253 45978437 +EML2 chr19 46110252 46148887 +SNRPD2 chr19 46190712 46195827 +QPCTL chr19 46195741 46207247 +DMPK chr19 46272975 46285810 +RSPH6A chr19 46298968 46318577 +SYMPK chr19 46318668 46366548 +IGFL4 chr19 46543006 46580376 +HIF3A chr19 46800303 46846690 +PPP5C chr19 46850251 46896238 +PNMAL1 chr19 46969748 46974820 +CALM3 chr19 47104331 47114050 +SLC1A5 chr19 47278140 47291851 +AP2S1 chr19 47341393 47354249 +ARHGAP35 chr19 47421933 47508334 +TMEM160 chr19 47549165 47551888 +SAE1 chr19 47616531 47713886 +C5AR1 chr19 47793280 47825323 +KPTN chr19 47978401 47987525 +NAPA chr19 47990894 48018497 +EHD2 chr19 48216600 48246391 +GLTSCR2 chr19 48248779 48260315 +SEPW1 chr19 48281829 48287943 +PLA2G4C chr19 48551100 48614074 +LIG1 chr19 48618702 48673860 +ZNF114 chr19 48675575 48790865 +CARD8 chr19 48684027 48759203 +KDELR1 chr19 48885827 48894810 +GRWD1 chr19 48949030 48960279 +KCNJ14 chr19 48958766 48969367 +CYTH2 chr19 48972289 48985571 +RPL18 chr19 49118585 49122793 +SPHK2 chr19 49122548 49133974 +SEC1P chr19 49141328 49184461 +BCAT2 chr19 49298319 49314286 +HSD17B14 chr19 49316274 49339935 +PLEKHA4 chr19 49340354 49371889 +PPP1R15A chr19 49375649 49379314 +NUCB1 chr19 49403307 49426629 +BAX chr19 49458072 49465055 +FTL chr19 49468558 49470135 +GYS1 chr19 49471382 49496567 +RUVBL2 chr19 49496705 49519252 +CGB chr19 49526126 49527590 +SNRNP70 chr19 49588676 49611862 +LIN7B chr19 49617581 49621717 +C19orf73 chr19 49621655 49622397 +CD37 chr19 49838428 49846592 +PTH2 chr19 49925671 49926698 +PIH1D1 chr19 49949555 49956754 +RPL13A chr19 49990811 49995565 +RPS11 chr19 49999622 50002946 +SNORD35B chr19 50000977 50001063 +FCGRT chr19 50010073 50029590 +NOSIP chr19 50058968 50093519 +IRF3 chr19 50162826 50169132 +BCL2L12 chr19 50168823 50177173 +PRMT1 chr19 50179043 50192286 +CPT1C chr19 50194155 50216988 +AP2A1 chr19 50270225 50310370 +FUZ chr19 50310126 50320633 +PTOV1 chr19 50353992 50364001 +PNKP chr19 50364461 50371166 +TBC1D17 chr19 50380682 50392005 +NUP62 chr19 50410082 50433020 +SIGLEC11 chr19 50452242 50464429 +VRK3 chr19 50479724 50529203 +NR1H2 chr19 50832910 50886239 +NAPSB chr19 50837053 50848024 +POLD1 chr19 50887461 50921273 +JOSD2 chr19 51009255 51014610 +ASPDH chr19 51014857 51017947 +SHANK1 chr19 51165084 51222707 +C19orf48 chr19 51300961 51308186 +KLK3 chr19 51358171 51364020 +KLK6 chr19 51461887 51472929 +KLK11 chr19 51525472 51531295 +IGLON5 chr19 51815102 51833608 +ETFB chr19 51848423 51869672 +SIGLEC10 chr19 51913275 51921057 +SIGLEC8 chr19 51954101 51961710 +SIGLEC6 chr19 52022779 52035110 +FPR1 chr19 52248425 52307363 +FPR2 chr19 52255279 52273779 +ZNF577 chr19 52359055 52394203 +ZNF649 chr19 52392477 52408293 +ZNF350 chr19 52467596 52490109 +ZNF614 chr19 52516018 52533493 +ZNF616 chr19 52616344 52643175 +PPP2R1A chr19 52693292 52730687 +ZNF766 chr19 52772824 52795977 +ZNF480 chr19 52800430 52829175 +ZNF610 chr19 52839498 52871031 +ZNF880 chr19 52873170 52889048 +ZNF528 chr19 52901102 52921665 +ZNF578 chr19 52956829 53015407 +ZNF83 chr19 53097313 53193749 +ZNF816-ZNF321P chr19 53430388 53445854 +ZNF160 chr19 53569859 53606689 +ZNF415 chr19 53611132 53636330 +ZNF818P chr19 53716241 53716649 +TPM3P9 chr19 53935237 53947923 +ZNF331 chr19 54024235 54083523 +MIR519E chr19 54183194 54183277 +MIR518F chr19 54203269 54203355 +MIR520D chr19 54223350 54223436 +MIR517C chr19 54244567 54244661 +MIR371B chr19 54290851 54291423 +MYADM chr19 54369477 54379691 +CACNG6 chr19 54495542 54515923 +NDUFA3 chr19 54606036 54612564 +TFPT chr19 54610320 54619055 +PRPF31 chr19 54618837 54635140 +LENG1 chr19 54658899 54663620 +MBOAT7 chr19 54677107 54693733 +TSEN34 chr19 54693789 54697585 +RPS9 chr19 54704610 54752862 +LILRA4 chr19 54844456 54850421 +LAIR1 chr19 54865362 54882165 +TTYH1 chr19 54926393 54948080 +LENG8 chr19 54960065 54973217 +LILRA1 chr19 55105047 55113555 +LILRB4 chr19 55155340 55181810 +FCAR chr19 55385704 55401838 +RDH13 chr19 55550476 55582659 +DNAAF3 chr19 55670031 55678090 +HSPBP1 chr19 55773599 55791749 +FAM71E2 chr19 55866276 55874628 +RPL28 chr19 55896713 55914612 +SHISA7 chr19 55940107 55954230 +ISOC2 chr19 55964352 55973710 +SSC5D chr19 55999771 56030465 +ZNF580 chr19 56146382 56154835 +ZNF581 chr19 56146825 56156988 +U2AF2 chr19 56165512 56186081 +ZNF444 chr19 56643968 56672262 +ZNF542 chr19 56879468 56891197 +ZNF583 chr19 56909335 56947404 +ZNF667 chr19 56950694 56989434 +ZNF471 chr19 57019212 57041590 +ZFP28 chr19 57050317 57068169 +ZNF71 chr19 57106632 57137849 +DUXA chr19 57665389 57678811 +ZNF805 chr19 57751973 57766503 +ZNF460 chr19 57791419 57804937 +ZNF543 chr19 57831877 57842144 +TRAPPC2P1 chr19 57874879 57876721 +ZNF548 chr19 57901218 57912786 +ZNF17 chr19 57922531 57933307 +ZNF772 chr19 57978031 57988938 +ZNF773 chr19 58011283 58029772 +ZNF549 chr19 58038693 58068910 +ZNF550 chr19 58046625 58071231 +ZNF530 chr19 58111253 58124090 +ZNF134 chr19 58125601 58134724 +ZNF211 chr19 58141761 58154147 +ZNF586 chr19 58281023 58331307 +ZNF587B chr19 58331099 58370018 +ZNF814 chr19 58360099 58400442 +ZNF587 chr19 58361225 58376480 +ZNF418 chr19 58433252 58446761 +ZNF606 chr19 58488421 58514717 +ZNF135 chr19 58570607 58597677 +ZSCAN18 chr19 58595205 58629794 +ZNF274 chr19 58694396 58724928 +ZNF544 chr19 58739960 58788815 +ZNF8 chr19 58790318 58807254 +ZSCAN22 chr19 58838385 58853698 +RPS5 chr19 58897767 58906173 +ZNF584 chr19 58912871 58929694 +ZNF324 chr19 58978423 58984945 +ZNF446 chr19 58985384 58992597 +TRIM28 chr19 59055458 59062089 +CHMP2A chr19 59062933 59066491 +MZF1 chr19 59073298 59084942 +C20orf96 chr20 251504 271390 +ZCCHC3 chr20 277737 280965 +NRSN2 chr20 327426 340304 +RBCK1 chr20 388142 411610 +TBC1D20 chr20 416124 443197 +CSNK2A1 chr20 459116 524465 +SLC52A3 chr20 740724 749131 +FAM110A chr20 814358 838106 +PSMF1 chr20 1093906 1160596 +SNPH chr20 1246960 1289972 +SDCBP2-AS1 chr20 1306049 1359378 +FKBP1A chr20 1349622 1373806 +NSFL1C chr20 1422807 1454487 +SIRPA chr20 1875154 1920543 +TGM3 chr20 2276647 2321724 +SNRPB chr20 2442280 2451499 +ZNF343 chr20 2462463 2505348 +NOP56 chr20 2632791 2639039 +IDH3B chr20 2639041 2644865 +CPXM1 chr20 2774715 2781283 +PCED1A chr20 2815960 2821836 +VPS16 chr20 2821349 2847378 +PTPRA chr20 2844830 3019722 +MRPS26 chr20 3026591 3028900 +OXT chr20 3052266 3053163 +UBOX5 chr20 3088219 3140842 +FASTKD5 chr20 3127165 3140543 +DDRGK1 chr20 3170996 3185331 +ITPA chr20 3189514 3204516 +C20orf194 chr20 3229951 3388272 +ATRN chr20 3451687 3631769 +GFRA4 chr20 3639939 3644046 +C20orf27 chr20 3734155 3749034 +CDC25B chr20 3767578 3786762 +AP5S1 chr20 3801178 3805949 +MAVS chr20 3827487 3849280 +PANK2 chr20 3869486 3907605 +RNF24 chr20 3907956 3996229 +SMOX chr20 4101627 4168394 +PRNP chr20 4666882 4682236 +RASSF2 chr20 4760669 4804291 +SLC23A2 chr20 4833002 4990939 +TMEM230 chr20 5080486 5093749 +PCNA chr20 5095599 5107272 +CDS2 chr20 5107432 5178533 +GPCPD1 chr20 5525085 5591672 +C20orf196 chr20 5731039 5844558 +CHGB chr20 5892076 5906007 +TRMT6 chr20 5917881 5931182 +MCM8 chr20 5931298 5975852 +CRLS1 chr20 5986736 6020699 +FERMT1 chr20 6055492 6104191 +BMP2 chr20 6748311 6760927 +TMX4 chr20 7957995 8000476 +PLCB1 chr20 8112824 8949003 +PAK7 chr20 9518036 9819689 +SNAP25 chr20 10199478 10288066 +MKKS chr20 10381657 10414870 +BTBD3 chr20 11871371 11907257 +ISM1-AS1 chr20 13218448 13220321 +TASP1 chr20 13246709 13619587 +ESF1 chr20 13694969 13765532 +NDUFAF5 chr20 13765596 13799067 +FLRT3 chr20 14303634 14318262 +SNRPB2 chr20 16710606 16722421 +OTOR chr20 16729003 16750707 +DSTN chr20 17550508 17590564 +RRBP1 chr20 17594323 17662940 +SNX5 chr20 17922241 17949623 +MGME1 chr20 17949556 17971765 +PET117 chr20 18118517 18123813 +CSRP2BP chr20 18118762 18169031 +ZNF133 chr20 18269121 18297640 +POLR3F chr20 18447771 18465287 +RBBP9 chr20 18467184 18477887 +SEC23B chr20 18488137 18542059 +LINC00493 chr20 18548064 18550207 +DTD1 chr20 18568537 18744561 +RIN2 chr20 19867165 19983101 +NAA20 chr20 19997760 20014299 +CRNKL1 chr20 20015012 20036690 +RALGAPA2 chr20 20370196 20693131 +PLK1S1 chr20 21106624 21227260 +XRN2 chr20 21283942 21370463 +NKX2-2 chr20 21491648 21494664 +CD93 chr20 23059986 23066977 +NXT1 chr20 23331373 23335414 +GZF1 chr20 23342787 23353700 +NAPB chr20 23355159 23402125 +CST9 chr20 23583047 23586513 +CST3 chr20 23608534 23619110 +SYNDIG1 chr20 24449835 24647252 +APMAP chr20 24943561 24973615 +ACSS1 chr20 24986868 25039616 +ENTPD6 chr20 25176329 25207365 +PYGB chr20 25228705 25278650 +ABHD12 chr20 25275379 25371619 +ZNF337 chr20 25654851 25677477 +FAM182A chr20 26035291 26073683 +FRG1B chr20 29611857 29634010 +DEFB122 chr20 30009239 30016983 +HM13 chr20 30102231 30157370 +ID1 chr20 30193086 30194318 +BCL2L1 chr20 30252255 30311792 +PDRG1 chr20 30532758 30539895 +HCK chr20 30639991 30689659 +TM9SF4 chr20 30697309 30755061 +POFUT1 chr20 30795683 30826470 +KIF3B chr20 30865467 30922814 +ASXL1 chr20 30946155 31027122 +C20orf112 chr20 31030862 31172876 +COMMD7 chr20 31290493 31331803 +MAPRE1 chr20 31407699 31438211 +BPIFB1 chr20 31861286 31897684 +CDK5RAP1 chr20 31946645 31989367 +CBFA2T2 chr20 32077881 32237842 +NECAB3 chr20 32244893 32262269 +PXMP4 chr20 32294512 32308125 +CHMP4B chr20 32399110 32442172 +RALY chr20 32581452 32696114 +EIF2S2 chr20 32676104 32700138 +AHCY chr20 32868074 32899608 +ITCH chr20 32951041 33099198 +DYNLRB1 chr20 33104214 33128762 +MAP1LC3A chr20 33134658 33148149 +PIGU chr20 33148346 33264910 +NCOA6 chr20 33284722 33413452 +TP53INP2 chr20 33292094 33301243 +GGT7 chr20 33432523 33460663 +ACSS2 chr20 33459949 33515769 +GSS chr20 33516236 33543620 +TRPC4AP chr20 33590207 33680674 +EDEM2 chr20 33703167 33865928 +MMP24 chr20 33814457 33864801 +EIF6 chr20 33866714 33872788 +CEP250 chr20 34042985 34099804 +ERGIC3 chr20 34129770 34145405 +CPNE1 chr20 34213953 34252878 +RBM12 chr20 34236847 34252878 +NFS1 chr20 34255977 34287281 +ROMO1 chr20 34287194 34288906 +RBM39 chr20 34291531 34330234 +PHF20 chr20 34359896 34538303 +SCAND1 chr20 34541539 34547394 +LINC00657 chr20 34633544 34638882 +EPB41L1 chr20 34679426 34820721 +AAR2 chr20 34824381 34858840 +DLGAP4 chr20 34894258 35157040 +C20orf24 chr20 35234137 35240960 +NDRG3 chr20 35280169 35374481 +SOGA1 chr20 35405845 35492089 +SAMHD1 chr20 35518632 35580246 +RBL1 chr20 35724188 35725581 +RPN2 chr20 35806813 35870022 +MANBAL chr20 35918041 35945663 +SRC chr20 35973088 36034453 +BLCAP chr20 36120874 36156333 +CTNNBL1 chr20 36322408 36500531 +TTI1 chr20 36611409 36661870 +RPRD1B chr20 36661948 36720768 +KIAA1755 chr20 36838890 36889174 +SNORA71C chr20 37058313 37058446 +SNHG11 chr20 37075221 37079564 +RALGAPB chr20 37101459 37207504 +ACTR5 chr20 37377085 37400834 +PPP1R16B chr20 37434348 37551667 +DHX35 chr20 37590942 37668366 +MAFB chr20 39314488 39317880 +TOP1 chr20 39657458 39753127 +PLCG1 chr20 39765600 39811629 +ZHX3 chr20 39807088 39946312 +CHD6 chr20 40030741 40247133 +PTPRT chr20 40701392 41818610 +SRSF6 chr20 42086568 42092245 +L3MBTL1 chr20 42136320 42179590 +IFT52 chr20 42219571 42275936 +FITM2 chr20 42931478 42939809 +TTPAL chr20 43104526 43123244 +SERINC3 chr20 43124862 43150750 +PKIG chr20 43160426 43252888 +ADA chr20 43248163 43280874 +RIMS4 chr20 43380449 43438979 +YWHAB chr20 43514317 43537173 +PABPC1L chr20 43538703 43587676 +TOMM34 chr20 43570771 43589127 +STK4 chr20 43595115 43708600 +SEMG1 chr20 43835638 43838413 +SYS1 chr20 43990577 44005438 +DBNDD2 chr20 44034697 44039250 +PIGT chr20 44044717 44054884 +WFDC10B chr20 44313292 44333658 +DNTTIP1 chr20 44420576 44440066 +SNX21 chr20 44462449 44471914 +ACOT8 chr20 44470360 44486045 +ZSWIM1 chr20 44509866 44513905 +NEURL2 chr20 44517264 44519926 +CTSA chr20 44518783 44527459 +PLTP chr20 44527399 44540794 +PCIF1 chr20 44563267 44576662 +ZNF335 chr20 44577292 44600833 +NCOA5 chr20 44689624 44718591 +CD40 chr20 44746911 44758502 +SLC35C2 chr20 44978167 44993043 +ELMO2 chr20 44994688 45061704 +ZNF334 chr20 45129701 45142198 +TP53RK chr20 45313004 45318418 +EYA2 chr20 45523263 45817492 +ZMYND8 chr20 45837859 45985567 +NCOA3 chr20 46130601 46285621 +SULF2 chr20 46285092 46415360 +PREX1 chr20 47240790 47444420 +ARFGEF2 chr20 47538427 47653230 +CSE1L chr20 47662849 47713489 +STAU1 chr20 47729878 47804904 +DDX27 chr20 47835884 47860614 +ZNFX1 chr20 47854483 47894963 +ZFAS1 chr20 47894715 47905797 +SLC9A8 chr20 48429250 48508779 +SPATA2 chr20 48519928 48532080 +RNF114 chr20 48552948 48570429 +UBE2V1 chr20 48697661 48732496 +TMEM189 chr20 48697663 48770335 +PTPN1 chr20 49126891 49201299 +PARD6B chr20 49348081 49373332 +ADNP chr20 49505585 49547958 +DPM1 chr20 49551404 49575092 +MOCS3 chr20 49575363 49577820 +ATP9A chr20 50213053 50385173 +ZFP64 chr20 50668202 50820847 +BCAS1 chr20 52553316 52687304 +PFDN4 chr20 52824386 52844591 +DOK5 chr20 53092136 53267710 +MC3R chr20 54823788 54824871 +CSTF1 chr20 54967427 54979518 +RTFDC1 chr20 55043647 55093943 +BMP7 chr20 55743804 55841685 +RAE1 chr20 55926066 55954267 +MTRNR2L3 chr20 55933496 55934878 +MIR4532 chr20 56470450 56470500 +RAB22A chr20 56884752 56942563 +VAPB chr20 56964178 57026157 +STX16 chr20 57226328 57254582 +NPEPL1 chr20 57264187 57294294 +MIR296 chr20 57392187 57392780 +GNAS chr20 57414773 57486247 +NELFCD chr20 57556263 57570188 +ATP5E chr20 57600522 57607437 +SLMO2 chr20 57608200 57617964 +EDN3 chr20 57875482 57901047 +PHACTR3 chr20 58152564 58422766 +FAM217B chr20 58508819 58523735 +MIR1257 chr20 60528602 60528718 +PSMA7 chr20 60711791 60718496 +OSBPL2 chr20 60813580 60871268 +ADRM1 chr20 60877149 60883918 +LAMA5 chr20 60883011 60942368 +RPS21 chr20 60962172 60963576 +CABLES2 chr20 60963688 60982341 +MRGBP chr20 61427805 61431945 +OGFR chr20 61436187 61445352 +DIDO1 chr20 61509090 61569304 +GID8 chr20 61569471 61576996 +HAR1B chr20 61726845 61733671 +YTHDF1 chr20 61826781 61847586 +NKAIN4 chr20 61872136 61904046 +ARFGAP1 chr20 61904137 61921142 +KCNQ2 chr20 62037542 62103993 +STMN3 chr20 62271061 62284780 +RTEL1-TNFRSF6B chr20 62290653 62330037 +TNFRSF6B chr20 62328021 62330037 +ARFRP1 chr20 62329996 62339377 +ZGPAT chr20 62338817 62367494 +LIME1 chr20 62366815 62370456 +TPD52L2 chr20 62496596 62522898 +DNAJC5 chr20 62526518 62567384 +MIR941-2 chr20 62550834 62550950 +UCKL1 chr20 62571186 62587769 +PRPF6 chr20 62612488 62664453 +SOX18 chr20 62679076 62680994 +TCEA2 chr20 62681189 62703700 +RGS19 chr20 62704534 62711323 +MYT1 chr20 62783144 62873604 +PCMTD2 chr20 62887094 62926855 +MIR3687 chr21 9826203 9826263 +TEKT4P2 chr21 9907190 9968585 +ABCC13 chr21 15608527 15735075 +HSPA13 chr21 15743436 15755805 +SAMSN1 chr21 15857549 15955723 +LINC00478 chr21 17442842 17999716 +C21orf37 chr21 18811208 18821503 +CXADR chr21 18884700 18965897 +BTG3 chr21 18965971 18985265 +LINC00320 chr21 22114908 22175534 +NCAM2 chr21 22370633 22915650 +MRPL39 chr21 26957968 26979829 +JAM2 chr21 27011584 27089874 +ATP5J chr21 27088815 27107984 +GABPA chr21 27106881 27144771 +APP chr21 27252861 27543446 +ADAMTS1 chr21 28208066 28217728 +N6AMT1 chr21 30244513 30257693 +LTN1 chr21 30300466 30365270 +RWDD2B chr21 30376705 30391699 +USP16 chr21 30396950 30426809 +CCT8 chr21 30428126 30446118 +BACH1 chr21 30566392 31003071 +CLDN8 chr21 31586324 31588391 +KRTAP13-4 chr21 31802572 31803216 +KRTAP6-3 chr21 31964759 31965394 +KRTAP21-1 chr21 32127439 32127746 +TIAM1 chr21 32490734 32932290 +SOD1 chr21 33031935 33041244 +SCAF4 chr21 33043346 33104388 +MIS18A chr21 33640530 33651380 +URB1 chr21 33683329 33765335 +C21orf119 chr21 33765439 33766254 +C21orf59 chr21 33964389 33985176 +SYNJ1 chr21 34001069 34100359 +PAXBP1-AS1 chr21 34100426 34115433 +PAXBP1 chr21 34106210 34144169 +C21orf62 chr21 34162985 34186053 +OLIG2 chr21 34398153 34401504 +OLIG1 chr21 34442450 34444726 +IFNAR2 chr21 34602206 34637980 +IL10RB chr21 34638663 34669539 +IFNAR1 chr21 34696734 34732168 +IFNGR2 chr21 34775202 34851655 +TMEM50B chr21 34804792 34852318 +GART chr21 34876238 34915797 +SON chr21 34914924 34949812 +CRYZL1 chr21 34961647 35016232 +ITSN1 chr21 35014706 35272165 +ATP5O chr21 35275757 35288284 +MRPS6 chr21 35445524 35515334 +RCAN1 chr21 35885440 35987441 +SETD4 chr21 37406839 37451687 +CBR1 chr21 37442239 37445464 +CBR3-AS1 chr21 37504065 37528615 +CBR3 chr21 37507210 37518864 +MORC3 chr21 37692487 37758446 +PIGP chr21 38431470 38445470 +TTC3 chr21 38445526 38575413 +DSCR3 chr21 38595721 38640262 +DYRK1A chr21 38738092 38889753 +ERG chr21 39751949 40033704 +ETS2 chr21 40177231 40196879 +PSMG1 chr21 40546695 40555777 +BRWD1 chr21 40556102 40693485 +HMGN1 chr21 40714241 40721573 +WRB chr21 40752170 40800454 +SH3BGR chr21 40817781 40887433 +DSCAM chr21 41382926 42219065 +MIR3197 chr21 42539484 42539556 +RIPK4 chr21 43159529 43187266 +ABCG1 chr21 43619799 43717354 +TFF1 chr21 43782391 43786703 +SLC37A1 chr21 43916118 44001550 +WDR4 chr21 44263204 44299678 +NDUFV3 chr21 44299754 44333414 +PKNOX1 chr21 44394620 44453691 +CBS chr21 44473301 44497053 +U2AF1 chr21 44513066 44527697 +RRP1B chr21 45079429 45115958 +PDXK chr21 45138975 45182188 +CSTB chr21 45192393 45196326 +RRP1 chr21 45209394 45225174 +PWP2 chr21 45527171 45551063 +C21orf33 chr21 45553487 45565605 +PFKL chr21 45719934 45747259 +C21orf2 chr21 45748827 45759285 +TRPM2 chr21 45770046 45862964 +KRTAP10-6 chr21 46011149 46012386 +KRTAP10-12 chr21 46117087 46117959 +UBE2G2 chr21 46188955 46221934 +SUMO3 chr21 46225532 46238694 +PTTG1IP chr21 46269500 46293752 +ITGB2 chr21 46305868 46351904 +FAM207A chr21 46359925 46396904 +SSR4P1 chr21 46491648 46494080 +SLC19A1 chr21 46913486 46964325 +PCBP3 chr21 47063608 47362368 +FTCD chr21 47556176 47575481 +SPATC1L chr21 47581062 47604390 +LSS chr21 47608055 47648738 +MCM3AP-AS1 chr21 47649131 47679304 +MCM3AP chr21 47655047 47706211 +YBEY chr21 47706251 47717665 +PCNT chr21 47744036 47865682 +DIP2A chr21 47878812 47989926 +S100B chr21 48018875 48025121 +PRMT2 chr21 48055079 48085036 +CECR7 chr22 17517460 17541715 +CECR5 chr22 17618401 17646177 +CECR1 chr22 17660194 17702879 +SLC25A18 chr22 18043139 18073760 +ATP6V1E1 chr22 18074902 18111584 +BCL2L13 chr22 18111621 18213388 +BID chr22 18216906 18257536 +MICAL3 chr22 18270415 18507325 +PEX26 chr22 18560689 18613905 +DGCR6 chr22 18893541 18901751 +PRODH chr22 18900294 18924066 +DGCR2 chr22 19023795 19109967 +DGCR14 chr22 19117792 19132197 +SLC25A1 chr22 19163095 19166343 +CLTCL1 chr22 19166986 19279239 +C22orf39 chr22 19338891 19435755 +MRPL40 chr22 19419425 19423598 +UFD1L chr22 19437433 19466738 +TBX1 chr22 19744226 19771116 +GNB1L chr22 19770747 19842462 +C22orf29 chr22 19833661 19842419 +TXNRD2 chr22 19863040 19929341 +COMT chr22 19929130 19957498 +TANGO2 chr22 20004537 20053449 +DGCR8 chr22 20067755 20099400 +MIR3618 chr22 20073269 20073356 +TRMT2A chr22 20099389 20104915 +RANBP1 chr22 20103461 20114878 +DGCR6L chr22 20301799 20307603 +PI4KAP1 chr22 20383524 20427835 +ZNF74 chr22 20748405 20762745 +KLHL22 chr22 20783528 20850170 +MED15 chr22 20850200 20941919 +PI4KA chr22 21061979 21213705 +SNAP29 chr22 21213271 21245506 +CRKL chr22 21271714 21308037 +AIFM3 chr22 21319396 21335649 +LZTR1 chr22 21333751 21353327 +THAP7 chr22 21353393 21356485 +THAP7-AS1 chr22 21356175 21364631 +RIMBP3B chr22 21737663 21743446 +PI4KAP2 chr22 21827289 21871822 +UBE2L3 chr22 21903736 21978323 +YDJC chr22 21982378 21984353 +SDF2L1 chr22 21996550 21998587 +PPIL2 chr22 22006559 22054304 +MIR301B chr22 22007270 22007347 +YPEL1 chr22 22051833 22090123 +MAPK1 chr22 22108789 22221970 +PPM1F chr22 22273793 22307209 +TOP3B chr22 22311397 22337213 +ZNF280B chr22 22838767 22863505 +POM121L1P chr22 22981648 22986944 +GNAZ chr22 23412540 23467224 +BCR chr22 23521891 23660224 +ZDHHC8P1 chr22 23732793 23744913 +GUSBP11 chr22 23980674 24059543 +SMARCB1 chr22 24129150 24176703 +SLC2A11 chr22 24198890 24228496 +MIF chr22 24236191 24237414 +GSTT2B chr22 24299601 24303373 +DDT chr22 24313554 24322660 +GSTT2 chr22 24322339 24326106 +GSTT1 chr22 24376133 24384680 +GSTTP2 chr22 24385938 24401899 +CABIN1 chr22 24407642 24574596 +SPECC1L chr22 24666786 24813708 +GUCD1 chr22 24936406 24951903 +SNRPD3 chr22 24951471 25005947 +FAM211B chr22 24981588 24989175 +SGSM1 chr22 25202236 25323545 +CRYBB2 chr22 25615489 25627836 +CRYBB2P1 chr22 25844072 25916821 +SEZ6L chr22 26565440 26779562 +HPS4 chr22 26839389 26879803 +SRRD chr22 26879843 26890624 +TFIP11 chr22 26887191 26908471 +MIAT chr22 27042392 27072438 +PITPNB chr22 28247657 28316122 +TTC28-AS1 chr22 28315364 28404569 +TTC28 chr22 28374004 29075853 +HSCB chr22 29138019 29153503 +CCDC117 chr22 29168662 29185283 +XBP1 chr22 29190543 29196585 +ZNRF3-AS1 chr22 29420987 29427464 +RHBDD3 chr22 29655841 29664198 +EWSR1 chr22 29663998 29696515 +RASL10A chr22 29708922 29715668 +AP1B1 chr22 29723669 29819168 +THOC5 chr22 29901868 29951205 +NIPSNAP1 chr22 29950797 29977326 +NF2 chr22 29999545 30094587 +ZMAT5 chr22 30126945 30163000 +UQCR10 chr22 30163358 30166402 +ASCC2 chr22 30184597 30234271 +MTMR3 chr22 30279144 30426855 +OSM chr22 30658818 30662829 +GATSL3 chr22 30681106 30685616 +TBC1D10A chr22 30687979 30723035 +SF3A1 chr22 30727977 30752936 +SEC14L2 chr22 30792846 30821305 +MTFP1 chr22 30821518 30825045 +GAL3ST1 chr22 30950622 30970574 +PES1 chr22 30972612 31003070 +DUSP18 chr22 31048038 31063877 +OSBP2 chr22 31089769 31303811 +MORC2-AS1 chr22 31318295 31328436 +TUG1 chr22 31366663 31375381 +SMTN chr22 31460091 31500743 +SELM chr22 31500758 31516055 +RNF185 chr22 31556168 31603005 +PIK3IP1 chr22 31677579 31688520 +PATZ1 chr22 31721790 31742218 +DRG1 chr22 31795509 31924726 +EIF4ENIF1 chr22 31832963 31892094 +PISD chr22 32014477 32058418 +PRR14L chr22 32072242 32146126 +DEPDC5 chr22 32149944 32303012 +YWHAH chr22 32340447 32353590 +RFPL2 chr22 32586422 32600718 +FBXO7 chr22 32870663 32894818 +SYN3 chr22 32908539 33454358 +TIMP3 chr22 33197687 33259030 +LARGE chr22 33558212 34318829 +ISX chr22 35462129 35483380 +HMGXB4 chr22 35653445 35691800 +TOM1 chr22 35695268 35743985 +HMOX1 chr22 35776354 35790207 +MCM5 chr22 35796056 35821423 +RBFOX2 chr22 36134783 36424473 +APOL4 chr22 36585172 36600886 +APOL2 chr22 36622256 36636000 +MYH9 chr22 36677327 36784063 +TXN2 chr22 36863083 36878077 +FOXRED2 chr22 36883237 36903148 +EIF3D chr22 36906897 36925483 +IFT27 chr22 37154246 37172300 +NCF4 chr22 37257030 37274057 +TST chr22 37406900 37415681 +MPST chr22 37415676 37425863 +C1QTNF6 chr22 37576207 37595425 +CYTH4 chr22 37678068 37711382 +MFNG chr22 37865101 37882439 +CDC42EP1 chr22 37956454 37965412 +GGA1 chr22 38004481 38029571 +SH3BP1 chr22 38030661 38062939 +PDXP chr22 38054734 38062941 +LGALS1 chr22 38071615 38075813 +NOL12 chr22 38077680 38170137 +TRIOBP chr22 38093011 38172563 +H1F0 chr22 38201114 38203442 +GCAT chr22 38203912 38213183 +ANKRD54 chr22 38226862 38245334 +MIR659 chr22 38243685 38243781 +EIF3L chr22 38244875 38285414 +POLR2F chr22 38348614 38437922 +PLA2G6 chr22 38507502 38601697 +MAFF chr22 38597889 38612518 +TMEM184B chr22 38615298 38669040 +CSNK1E chr22 38686697 38794527 +DDX17 chr22 38879445 38903665 +CBY1 chr22 39052641 39069859 +TOMM22 chr22 39077953 39080818 +JOSD1 chr22 39081548 39097561 +GTPBP1 chr22 39101728 39134304 +SUN2 chr22 39130730 39190148 +DNAL4 chr22 39174513 39190203 +NPTXR chr22 39214457 39239987 +APOBEC3D chr22 39410368 39429281 +PDGFB chr22 39619364 39640756 +RPL3 chr22 39708887 39716394 +SYNGR1 chr22 39745930 39781593 +TAB1 chr22 39795746 39833065 +ATF4 chr22 39915700 39918691 +RPS19BP1 chr22 39925098 39928860 +TNRC6B chr22 40440821 40731812 +ADSL chr22 40742507 40786467 +SGSM3 chr22 40766595 40806293 +MKL1 chr22 40806285 41032706 +SLC25A17 chr22 41165634 41215403 +ST13 chr22 41220539 41253026 +XPNPEP3 chr22 41253081 41363838 +RBX1 chr22 41347351 41369313 +EP300 chr22 41487790 41576081 +L3MBTL2 chr22 41601209 41627275 +CHADL chr22 41625517 41636938 +RANGAP1 chr22 41641615 41682255 +ZC3H7B chr22 41697526 41756151 +TEF chr22 41763337 41795330 +TOB2 chr22 41829496 41843027 +PHF5A chr22 41855721 41864729 +ACO2 chr22 41865129 41924993 +POLR3H chr22 41921808 41940610 +PMM1 chr22 41972898 41985894 +DESI1 chr22 41994032 42017100 +XRCC6 chr22 42017123 42060044 +NHP2L1 chr22 42069934 42086508 +SREBF2 chr22 42229109 42303312 +TNFRSF13C chr22 42321045 42322822 +NAGA chr22 42454358 42466846 +NDUFA6 chr22 42481529 42486959 +TCF20 chr22 42556019 42739622 +RRP7A chr22 42905974 42915808 +RRP7B chr22 42951229 42978044 +POLDIP3 chr22 42979727 43010968 +CYB5R3 chr22 43013846 43045574 +A4GALT chr22 43088127 43117304 +ARFGAP3 chr22 43192508 43254112 +PACSIN2 chr22 43231418 43411151 +TTLL1 chr22 43435522 43485434 +MCAT chr22 43528212 43539400 +TSPO chr22 43547520 43559248 +TTLL12 chr22 43562628 43583139 +EFCAB6-AS1 chr22 43911997 43932747 +PNPLA3 chr22 44319619 44360368 +SAMM50 chr22 44351301 44406411 +PARVG chr22 44568836 44615413 +LDOC1L chr22 44888452 44894178 +LINC00207 chr22 44965219 44968221 +PHF21B chr22 45277042 45405880 +NUP50 chr22 45559722 45583896 +FAM118A chr22 45704849 45737836 +ATXN10 chr22 46067678 46241187 +MIR4763 chr22 46509446 46509537 +CDPF1 chr22 46639908 46646576 +TRMU chr22 46726772 46753237 +CERK chr22 47080308 47134158 +TBC1D22A chr22 47158518 47571336 +FAM19A5 chr22 48885272 49246724 +ALG12 chr22 50293877 50312106 +CRELD2 chr22 50311815 50321188 +PIM3 chr22 50354161 50357728 +MLC1 chr22 50497820 50524331 +TRABD chr22 50624344 50638027 +TUBGCP6 chr22 50656118 50683421 +MAPK12 chr22 50683879 50700254 +MAPK11 chr22 50702142 50709196 +PLXNB2 chr22 50713408 50746056 +DENND6B chr22 50747459 50765489 +PPP6R2 chr22 50781733 50883514 +LMF2 chr22 50941376 50946135 +NCAPH2 chr22 50946645 50961901 +SCO2 chr22 50961997 50964868 +TYMP chr22 50964181 50968485 +CPT1B chr22 51007290 51017899 +CHKB chr22 51017378 51039884 +ACR chr22 51176624 51183762 +RABL2B chr22 51205929 51222091 +PLCXD1 chrX 192989 220023 +GTPBP6 chrX 220025 230886 +LINC00685 chrX 281725 282586 +CSF2RA chrX 1387693 1429274 +IL3RA chrX 1455509 1501578 +SLC25A6 chrX 1505045 1511617 +ASMTL chrX 1522032 1572655 +DHRSX chrX 2137557 2420846 +ZBED1 chrX 2404455 2419008 +LINC00102 chrX 2531029 2533388 +CD99 chrX 2609220 2659350 +CXorf28 chrX 3189861 3202694 +PRKX chrX 3522411 3631649 +NLGN4X chrX 5758678 6146904 +HDHD1 chrX 6966961 7066231 +VCX2 chrX 8137989 8139308 +SHROOM2 chrX 9754496 9917483 +CLCN4 chrX 10125024 10205700 +MID1 chrX 10413350 10851773 +HCCS chrX 11129421 11141198 +MSL3 chrX 11776278 11793870 +TLR8 chrX 12924739 12941288 +TMSB4X chrX 12993227 12995346 +RAB9A chrX 13707244 13728625 +TRAPPC2 chrX 13730363 13752754 +OFD1 chrX 13752832 13787480 +GPM6B chrX 13789150 13956757 +GEMIN8 chrX 14026398 14048012 +MOSPD2 chrX 14891563 14940449 +ACE2 chrX 15579156 15620271 +CA5BP1 chrX 15693055 15721847 +ZRSR2 chrX 15808595 15841383 +AP1S2 chrX 15843929 15873054 +CTPS2 chrX 16606126 16731059 +SYAP1 chrX 16737755 16783459 +TXLNG chrX 16804550 16862642 +RBBP7 chrX 16857406 16888537 +REPS2 chrX 16964814 17171395 +SCML1 chrX 17755588 17773105 +SCML2 chrX 18257434 18372847 +RS1 chrX 18658030 18690229 +PDHA1 chrX 19362011 19379823 +SH3KBP1 chrX 19552093 19905719 +MIR23C chrX 20035206 20035305 +EIF1AX chrX 20142636 20159962 +MBTPS2 chrX 21857754 21903542 +SMS chrX 21958691 22025798 +PHEX-AS1 chrX 22180850 22191100 +PRDX4 chrX 23682379 23704516 +ACOT9 chrX 23720370 23784592 +SAT1 chrX 23801290 23804343 +APOO chrX 23851470 23926057 +EIF2S3 chrX 24072833 24096088 +ZFX chrX 24167290 24234372 +PDK3 chrX 24483338 24557954 +PCYT1B chrX 24576204 24690794 +POLA1 chrX 24712036 25015103 +MAGEB18 chrX 26156460 26158852 +IL1RAPL1 chrX 28605516 29974840 +MAGEB3 chrX 30248553 30255607 +CXorf21 chrX 30576941 30595961 +GK chrX 30671476 30748725 +FAM47A chrX 34147869 34150447 +PRRG1 chrX 37208528 37316548 +LANCL3 chrX 37430822 37543716 +CYBB chrX 37639264 37672714 +TSPAN7 chrX 38420623 38548169 +ATP6AP2 chrX 40440146 40465889 +CXorf38 chrX 40488285 40506819 +MED14 chrX 40507558 40595110 +USP9X chrX 40944888 41095832 +DDX3X chrX 41192651 41223725 +CASK chrX 41374187 41782716 +GPR34 chrX 41548226 41556526 +GPR82 chrX 41583408 41589388 +NDP chrX 43808022 43832750 +FUNDC1 chrX 44382885 44402247 +KDM6A chrX 44732757 44971847 +KRBOX4 chrX 46306292 46356857 +ZNF674-AS1 chrX 46404928 46407843 +SLC9A7 chrX 46464753 46618490 +RP2 chrX 46696375 46741793 +CXorf31 chrX 46746852 46759138 +PHF16 chrX 46771711 46920641 +NDUFB11 chrX 47001615 47004903 +RBM10 chrX 47004268 47046212 +UBA1 chrX 47050260 47074527 +INE1 chrX 47064320 47065264 +CDK16 chrX 47077259 47089396 +USP11 chrX 47092089 47107727 +SNORA11C chrX 47248048 47248176 +ZNF41 chrX 47305278 47342345 +ARAF chrX 47420516 47431307 +ELK1 chrX 47494920 47510003 +UXT chrX 47511197 47518560 +ZNF81 chrX 47696301 47861960 +ZNF182 chrX 47834250 47863377 +ZNF630 chrX 47842756 47931025 +SSX4 chrX 48242863 48252785 +FTSJ1 chrX 48334541 48344752 +PORCN chrX 48367350 48379202 +EBP chrX 48379546 48387104 +TBC1D25 chrX 48397845 48420997 +RBM3 chrX 48432837 48437454 +WDR13 chrX 48448430 48463581 +SUV39H1 chrX 48553945 48567403 +HDAC6 chrX 48659784 48683392 +PCSK1N chrX 48689504 48694035 +TIMM17B chrX 48750730 48755426 +PQBP1 chrX 48755195 48760420 +SLC35A2 chrX 48760459 48769235 +OTUD5 chrX 48779305 48815648 +GRIPAP1 chrX 48830134 48858675 +PRAF2 chrX 48928813 48931730 +WDR45 chrX 48929385 48958108 +GPKOW chrX 48970334 48980151 +PRICKLE3 chrX 49031151 49042845 +SYP chrX 49044269 49056718 +CCDC22 chrX 49091927 49106987 +GAGE2D chrX 49207160 49214420 +GAGE12H chrX 49344586 49351904 +MIR362 chrX 49773572 49773636 +SHROOM4 chrX 50334647 50557302 +NUDT10 chrX 51075083 51080377 +NUDT11 chrX 51232863 51239448 +GSPT2 chrX 51486481 51489324 +MAGED1 chrX 51546103 51645453 +MAGED4B chrX 51804923 51812368 +XAGE2B chrX 52112173 52118823 +SSX2B chrX 52780318 52790617 +FAM156A chrX 52976462 53024651 +TSPYL2 chrX 53111549 53117722 +KDM5C chrX 53220503 53254604 +SMC1A chrX 53401070 53449677 +HSD17B10 chrX 53458206 53461320 +HUWE1 chrX 53559057 53713673 +PHF8 chrX 53963109 54075391 +FAM120C chrX 54094757 54209714 +WNK3 chrX 54219256 54385075 +TSR2 chrX 54466834 54471920 +FGD1 chrX 54471887 54522599 +GNL3L chrX 54556644 54587504 +MAGED2 chrX 54834032 54842445 +TRO chrX 54946895 54957864 +APEX2 chrX 55026790 55035490 +FAM104B chrX 55169535 55187743 +MTRNR2L10 chrX 55207824 55208944 +MAGEH1 chrX 55478538 55479998 +RRAGB chrX 55744172 55785207 +UBQLN2 chrX 56590026 56593443 +UQCRBP1 chrX 56763675 56764010 +SPIN3 chrX 57002803 57021970 +SPIN2B chrX 57144984 57147980 +ARHGEF9 chrX 62854847 63005426 +AMER1 chrX 63404997 63425624 +ZC4H2 chrX 64136250 64254593 +LAS1L chrX 64732462 64754655 +MSN chrX 64808257 64961791 +VSIG4 chrX 65241580 65259967 +HEPH chrX 65382391 65488709 +EDA2R chrX 65815479 65859108 +OPHN1 chrX 67262186 67653755 +YIPF6 chrX 67718165 67757127 +PJA1 chrX 68380694 68385636 +MIR676 chrX 69242707 69242773 +IGBP1 chrX 69353299 69386174 +PDZD11 chrX 69506445 69510364 +KIF4A chrX 69509879 69640682 +SNX12 chrX 70279094 70288273 +MED12 chrX 70338406 70362303 +NLGN3 chrX 70364681 70391051 +GJB1 chrX 70435044 70445366 +ZMYM3 chrX 70459474 70474996 +NONO chrX 70503042 70521018 +TAF1 chrX 70586114 70752224 +INGX chrX 70711529 70712778 +OGT chrX 70752933 70795747 +CXorf49 chrX 70934221 70938135 +PIN4 chrX 71401203 71522776 +RPS4X chrX 71475529 71497150 +HDAC8 chrX 71549366 71792953 +PHKA1 chrX 71798664 71934167 +MAP2K4P1 chrX 72744111 72782921 +CHIC1 chrX 72783036 72906937 +XIST chrX 73040486 73072588 +JPX chrX 73164159 73290243 +FTX chrX 73183790 73513409 +ZCCHC13 chrX 73524025 73524868 +SLC16A2 chrX 73641085 73753752 +RLIM chrX 73805052 73834452 +ABCB7 chrX 74273115 74376567 +UPRT chrX 74493920 74524435 +ZDHHC15 chrX 74588262 74743337 +TTC3P1 chrX 74960541 74966749 +PBDC1 chrX 75392771 75398039 +MAGEE1 chrX 75648046 75651744 +ATRX chrX 76760356 77041702 +MAGT1 chrX 77081861 77151090 +COX7B chrX 77154935 77162870 +PGK1 chrX 77320685 77384793 +TAF9B chrX 77385245 77395203 +ZCCHC5 chrX 77911566 77914825 +HMGN5 chrX 80369200 80457441 +SH3BGRL chrX 80457442 80554046 +POU3F4 chrX 82763269 82764775 +APOOL chrX 84258832 84343069 +ZNF711 chrX 84498997 84528368 +CHM chrX 85116185 85302566 +DACH2 chrX 85403462 86087607 +PABPC5 chrX 90689594 90693583 +PCDH11X chrX 91034260 91878229 +NAP1L3 chrX 92925929 92928567 +DIAPH2 chrX 95939662 96859996 +TSPAN6 chrX 99883667 99894988 +CSTF2 chrX 100075384 100095921 +ARL13A chrX 100224697 100245818 +TRMT2B chrX 100264335 100307105 +TIMM8A chrX 100600649 100604184 +RPL36A chrX 100645812 100651105 +RPL36A-HNRNPH2 chrX 100645999 100667285 +GLA chrX 100652791 100662913 +HNRNPH2 chrX 100663283 100669121 +ARMCX4 chrX 100673275 100788446 +ARMCX1 chrX 100805514 100809683 +ARMCX6 chrX 100870110 100872991 +ARMCX3 chrX 100877787 100882833 +ARMCX2 chrX 100910267 100914876 +ZMAT1 chrX 101137262 101187004 +TCEAL2 chrX 101380660 101382683 +TCEAL6 chrX 101395448 101397942 +BEX5 chrX 101408680 101411029 +ARMCX5 chrX 101854096 101859087 +BHLHB9 chrX 101975616 102008468 +LINC00630 chrX 102024089 102140334 +BEX1 chrX 102317579 102319168 +BEX4 chrX 102470020 102472174 +TCEAL8 chrX 102507923 102510131 +BEX2 chrX 102564274 102565974 +TCEAL7 chrX 102585124 102587254 +WBP5 chrX 102611373 102613397 +NGFRAP1 chrX 102631268 102633005 +TCEAL4 chrX 102831159 102842657 +TCEAL3 chrX 102862379 102884618 +TCEAL1 chrX 102883632 102885881 +MORF4L2 chrX 102930424 102943086 +PLP1 chrX 103028647 103047548 +RAB9B chrX 103077252 103087158 +FAM199X chrX 103411301 103440583 +TEX13A chrX 104463611 104465358 +CXorf57 chrX 105855160 105922672 +CLDN2 chrX 106143394 106174091 +RBM41 chrX 106307650 106362057 +PRPS1 chrX 106871737 106894256 +TSC22D3 chrX 106956451 107020572 +PSMD10 chrX 107327437 107334848 +ATG4A chrX 107334898 107397901 +NXT2 chrX 108779010 108787919 +ACSL4 chrX 108867473 108976632 +AMMECR1 chrX 109437414 109683461 +DCX chrX 110537007 110655603 +ALG13 chrX 110909043 111003877 +TRPC5OS chrX 111125125 111147218 +MIR1298 chrX 113949650 113949761 +PLS3 chrX 114795501 114885181 +CXorf61 chrX 115592849 115594164 +KLHL13 chrX 117031776 117251303 +WDR44 chrX 117480036 117583924 +IL13RA1 chrX 117861535 117928502 +PGRMC1 chrX 118370216 118378429 +SLC25A43 chrX 118533023 118588441 +SLC25A5 chrX 118602363 118605282 +CXorf56 chrX 118672112 118699397 +UBE2A chrX 118708501 118718381 +NKRF chrX 118722300 118739858 +RPL39 chrX 118920467 118925606 +UPF3B chrX 118967985 118986961 +RNF113A chrX 119004497 119005791 +NDUFA1 chrX 119005450 119010625 +NKAP chrX 119059014 119077735 +ZBTB33 chrX 119384607 119392253 +TMEM255A chrX 119392505 119445411 +LAMP2 chrX 119561682 119603220 +CUL4B chrX 119658464 119709649 +MCTS1 chrX 119727865 119754929 +C1GALT1C1 chrX 119759648 119764005 +CT47A9 chrX 120077421 120080733 +GRIA3 chrX 122318006 122624766 +THOC2 chrX 122734412 122866906 +XIAP chrX 122993574 123047829 +STAG2 chrX 123094062 123556514 +SMARCA1 chrX 128580480 128657477 +OCRL chrX 128673826 128726538 +SASH3 chrX 128913955 128929177 +ZDHHC9 chrX 128937264 128977885 +UTP14A chrX 129040097 129063737 +AIFM1 chrX 129263337 129299861 +RAB33A chrX 129305623 129318844 +ZNF280C chrX 129336685 129402873 +SLC25A14 chrX 129473874 129507335 +RBMX2 chrX 129535943 129547317 +FAM45B chrX 129628939 129630562 +MST4 chrX 131157293 131209971 +RAP2C chrX 131337053 131353471 +MIR363 chrX 133303408 133303482 +PHF6 chrX 133507283 133562820 +HPRT1 chrX 133594183 133654543 +MIR450A1 chrX 133674371 133674461 +FAM122C chrX 133930819 133988640 +MOSPD1 chrX 134021656 134049297 +FAM127C chrX 134154543 134156559 +FAM127A chrX 134166333 134167576 +FAM127B chrX 134184962 134186226 +ZNF75D chrX 134382867 134478012 +ZNF449 chrX 134478721 134497077 +DDX26B chrX 134654584 134716435 +CT45A1 chrX 134847185 134857354 +MMGT1 chrX 135044229 135056222 +SLC9A6 chrX 135067598 135129423 +FHL1 chrX 135229559 135293518 +MAP7D3 chrX 135295381 135338641 +HTATSF1 chrX 135579238 135594505 +ARHGEF6 chrX 135747706 135864247 +RBMX chrX 135930163 135962923 +ZIC3 chrX 136648301 136659850 +SOX3 chrX 139585152 139587225 +CDR1 chrX 139865425 139866723 +LDOC1 chrX 140269934 140271310 +SPANXD chrX 140785568 140786896 +SLITRK2 chrX 144899350 144907360 +MIR506 chrX 146312238 146312361 +FMR1 chrX 146993469 147032645 +FMR1NB chrX 147062849 147108187 +IDS chrX 148558521 148615470 +CXorf40A chrX 148621900 148632055 +TMEM185A chrX 148678216 148713568 +MAGEA8 chrX 149009941 149014609 +CXorf40B chrX 149097745 149107029 +MTM1 chrX 149737069 149841795 +MTMR1 chrX 149861435 149933576 +CD99L2 chrX 149934810 150067289 +HMGB3 chrX 150148982 150159248 +VMA21 chrX 150564987 150577836 +GABRA3 chrX 151334706 151619830 +CSAG1 chrX 151903228 151909518 +CETN2 chrX 151995517 151999321 +NSDHL chrX 151999511 152038273 +PNMA6A chrX 152240839 152340864 +ZNF275 chrX 152599613 152625568 +HAUS7 chrX 152713124 152760978 +FAM58A chrX 152853377 152865500 +DUSP9 chrX 152907946 152916781 +SLC6A8 chrX 152953554 152962048 +BCAP31 chrX 152965947 152990152 +PLXNB3 chrX 153029651 153044801 +IDH3G chrX 153051221 153059978 +SSR4 chrX 153058971 153063960 +L1CAM chrX 153126969 153174677 +ARHGAP4 chrX 153172821 153200452 +NAA10 chrX 153194695 153200676 +HCFC1 chrX 153213004 153237258 +TMEM187 chrX 153237778 153248646 +IRAK1 chrX 153275951 153285431 +MECP2 chrX 153287024 153363212 +FLNA chrX 153576892 153603006 +EMD chrX 153607557 153609883 +RPL10 chrX 153618315 153637504 +TAZ chrX 153639854 153650065 +ATP6AP1 chrX 153656978 153664862 +GDI1 chrX 153665266 153671814 +FAM50A chrX 153672473 153679002 +LAGE3 chrX 153706028 153707596 +UBL4A chrX 153712056 153715009 +FAM3A chrX 153733350 153744566 +G6PD chrX 153759606 153775787 +IKBKG chrX 153769414 153796782 +DKC1 chrX 153991031 154005964 +MPP1 chrX 154006959 154049282 +F8 chrX 154064063 154255215 +FUNDC2 chrX 154254255 154288578 +BRCC3 chrX 154299695 154351349 +VBP1 chrX 154425284 154468098 +RAB39B chrX 154487526 154493874 +CLIC2 chrX 154505500 154563966 +TMLHE chrX 154719776 154899605 +VAMP7 chrX 155110956 155173433 +RPS4Y1 chrY 2709527 2800041 +TTTY1B chrY 6258472 6279605 +TTTY18 chrY 8551411 8551919 +RBMY3AP chrY 9448180 9458885 +TTTY15 chrY 14774265 14804162 +USP9Y chrY 14813160 14972764 +DDX3Y chrY 15016019 15032390 +NLGN4Y chrY 16634518 16957530 +CDY2B chrY 19989290 19992100 +KDM5D chrY 21865751 21906825 +EIF1AY chrY 22737611 22755040 +PRY2 chrY 24217903 24242154 +BPY2 chrY 25119966 25151612 +BPY2C chrY 27177048 27208695 diff --git a/inst/extdata/oligodendroglioma_annotations_downsampled.txt b/inst/extdata/oligodendroglioma_annotations_downsampled.txt new file mode 100644 index 00000000..6047ac53 --- /dev/null +++ b/inst/extdata/oligodendroglioma_annotations_downsampled.txt @@ -0,0 +1,184 @@ +MGH54_P2_C12 Microglia/Macrophage +MGH36_P6_F03 Microglia/Macrophage +MGH53_P4_H08 Microglia/Macrophage +MGH53_P2_E09 Microglia/Macrophage +MGH36_P5_E12 Microglia/Macrophage +MGH54_P2_H07 Microglia/Macrophage +MGH36_P4_H06 Microglia/Macrophage +MGH53_P1_C01 Microglia/Macrophage +MGH36_P4_A10 Microglia/Macrophage +MGH36_P3_D10 Microglia/Macrophage +MGH54_P2_F09 Microglia/Macrophage +MGH36_P7_H06 Microglia/Macrophage +MGH54_P2_H03 Microglia/Macrophage +MGH36_P8_A02 Microglia/Macrophage +MGH53_P2_C08 Microglia/Macrophage +MGH53_P2_A07 Microglia/Macrophage +MGH53_P1_F10 Microglia/Macrophage +MGH36_P3_H06 Microglia/Macrophage +MGH54_P2_F03 Microglia/Macrophage +MGH54_P16_F12 Oligodendrocytes (non-malignant) +MGH54_P12_C10 Oligodendrocytes (non-malignant) +MGH54_P11_C11 Oligodendrocytes (non-malignant) +MGH54_P15_D06 Oligodendrocytes (non-malignant) +MGH54_P16_A03 Oligodendrocytes (non-malignant) +MGH53_P7_B09 Oligodendrocytes (non-malignant) +MGH54_P10_G04 Oligodendrocytes (non-malignant) +MGH53_P2_A02 Oligodendrocytes (non-malignant) +MGH53_P7_F07 Oligodendrocytes (non-malignant) +MGH53_P5_G02 Oligodendrocytes (non-malignant) +MGH53_P11_H03 Oligodendrocytes (non-malignant) +MGH53_P1_A10 Oligodendrocytes (non-malignant) +MGH53_P5_H09 Oligodendrocytes (non-malignant) +MGH53_P11_E03 Oligodendrocytes (non-malignant) +MGH53_P10_F11 Oligodendrocytes (non-malignant) +MGH53_P1_D07 Oligodendrocytes (non-malignant) +MGH53_P2_G04 Oligodendrocytes (non-malignant) +MGH53_P2_G09 Oligodendrocytes (non-malignant) +MGH53_P5_F04 Oligodendrocytes (non-malignant) +MGH53_P11_F08 Oligodendrocytes (non-malignant) +MGH53_P8_F03 Oligodendrocytes (non-malignant) +MGH53_P6_B11 Oligodendrocytes (non-malignant) +MGH53_P6_H06 Oligodendrocytes (non-malignant) +MGH36_P1_B02 malignant_MGH36 +MGH36_P1_H10 malignant_MGH36 +MGH36_P3_A09 malignant_MGH36 +MGH36_P3_B02 malignant_MGH36 +MGH36_P3_C04 malignant_MGH36 +MGH36_P3_E06 malignant_MGH36 +MGH36_P4_B09 malignant_MGH36 +MGH36_P4_D11 malignant_MGH36 +MGH36_P4_G03 malignant_MGH36 +MGH36_P6_C04 malignant_MGH36 +MGH36_P6_G08 malignant_MGH36 +MGH36_P7_B04 malignant_MGH36 +MGH36_P7_D03 malignant_MGH36 +MGH36_P7_F04 malignant_MGH36 +MGH36_P7_G04 malignant_MGH36 +MGH36_P5_B08 malignant_MGH36 +MGH36_P5_F05 malignant_MGH36 +MGH36_P5_F11 malignant_MGH36 +MGH36_P5_H05 malignant_MGH36 +MGH36_P10_B08 malignant_MGH36 +MGH36_P10_C10 malignant_MGH36 +MGH36_P10_E07 malignant_MGH36 +MGH36_P10_F09 malignant_MGH36 +MGH36_P8_E05 malignant_MGH36 +MGH36_P8_H09 malignant_MGH36 +MGH36_P9_B01 malignant_MGH36 +MGH36_P9_B11 malignant_MGH36 +MGH36_P9_H03 malignant_MGH36 +MGH36_P2_A08 malignant_MGH36 +MGH36_P2_C02 malignant_MGH36 +MGH36_P2_G01 malignant_MGH36 +MGH36_P2_G02 malignant_MGH36 +MGH36_P2_H06 malignant_MGH36 +MGH53_P5_A08 malignant_MGH53 +MGH53_P5_D02 malignant_MGH53 +MGH53_P6_F03 malignant_MGH53 +MGH53_P6_H04 malignant_MGH53 +MGH53_P7_B10 malignant_MGH53 +MGH53_P7_C03 malignant_MGH53 +MGH53_P7_E02 malignant_MGH53 +MGH53_P7_G11 malignant_MGH53 +MGH53_P7_H03 malignant_MGH53 +MGH53_P8_A07 malignant_MGH53 +MGH53_P8_C11 malignant_MGH53 +MGH53_P8_E05 malignant_MGH53 +MGH53_P8_E10 malignant_MGH53 +MGH53_P8_H04 malignant_MGH53 +MGH53_P1_B04 malignant_MGH53 +MGH53_P12_A01 malignant_MGH53 +MGH53_P12_B09 malignant_MGH53 +MGH53_P12_C02 malignant_MGH53 +MGH53_P12_C09 malignant_MGH53 +MGH53_P12_D12 malignant_MGH53 +MGH53_P12_E03 malignant_MGH53 +MGH53_P10_B02 malignant_MGH53 +MGH53_P10_C09 malignant_MGH53 +MGH53_P10_E09 malignant_MGH53 +MGH53_P10_H08 malignant_MGH53 +MGH53_P11_A03 malignant_MGH53 +MGH53_P11_B02 malignant_MGH53 +MGH53_P11_B11 malignant_MGH53 +MGH53_P11_F12 malignant_MGH53 +MGH53_P11_H12 malignant_MGH53 +MGH53_P9_A09 malignant_MGH53 +MGH53_P9_C12 malignant_MGH53 +MGH53_P4_C03 malignant_MGH53 +MGH53_P4_F01 malignant_MGH53 +97_P3_G07 malignant_97 +97_P3_E04 malignant_97 +97_P3_D10 malignant_97 +97_P3_E01 malignant_97 +97_P3_E03 malignant_97 +97_P3_B10 malignant_97 +97_P3_B04 malignant_97 +97_P3_B01 malignant_97 +97_P3_B03 malignant_97 +97_P3_D01 malignant_97 +97_P3_D04 malignant_97 +97_P3_D12 malignant_97 +97_P3_F12 malignant_97 +97_P3_E12 malignant_97 +97_P5_D09 malignant_97 +97_P6_H01 malignant_97 +97_P5_C10 malignant_97 +97_P6_E07 malignant_97 +97_P5_D02 malignant_97 +97_P6_G10 malignant_97 +97_P5_G05 malignant_97 +97_P6_B09 malignant_97 +97_P5_H08 malignant_97 +97_P5_F04 malignant_97 +97_P5_D01 malignant_97 +97_P6_F05 malignant_97 +97_P6_A06 malignant_97 +97_P5_A07 malignant_97 +97_P6_E01 malignant_97 +97_P6_D09 malignant_97 +97_P5_G06 malignant_97 +97_P5_E12 malignant_97 +97_P6_A07 malignant_97 +97_P6_G12 malignant_97 +97_P6_H06 malignant_97 +93_P3_B02 malignant_93 +93_P3_G05 malignant_93 +93_P3_H04 malignant_93 +93_P3_A10 malignant_93 +93_P3_C04 malignant_93 +93_P3_D07 malignant_93 +93_P3_G07 malignant_93 +93_P3_E09 malignant_93 +93_P3_G11 malignant_93 +93_P3_A11 malignant_93 +93_P6_H11 malignant_93 +93_P5_H06 malignant_93 +93_P5_C12 malignant_93 +93_P6_A02 malignant_93 +93_P5_D07 malignant_93 +93_P6_C07 malignant_93 +93_P9_C04 malignant_93 +93_P9_E04 malignant_93 +93_P9_H01 malignant_93 +93_P8_B06 malignant_93 +93_P10_E05 malignant_93 +93_P9_B10 malignant_93 +93_P8_G11 malignant_93 +93_P9_F02 malignant_93 +93_P10_F03 malignant_93 +93_P9_G11 malignant_93 +93_P8_E09 malignant_93 +93_P8_C11 malignant_93 +93_P9_A03 malignant_93 +93_P10_G11 malignant_93 +93_P9_B11 malignant_93 +93_P9_D06 malignant_93 +93_P8_B02 malignant_93 +93_P8_C09 malignant_93 +93_P9_H03 malignant_93 +93_P10_D04 malignant_93 +93_P8_G09 malignant_93 +93_P10_B10 malignant_93 +93_P9_C07 malignant_93 +93_P8_A12 malignant_93 diff --git a/inst/extdata/oligodendroglioma_expression_downsampled.counts.matrix.gz b/inst/extdata/oligodendroglioma_expression_downsampled.counts.matrix.gz new file mode 100644 index 00000000..65c5493e Binary files /dev/null and b/inst/extdata/oligodendroglioma_expression_downsampled.counts.matrix.gz differ diff --git a/inst/script/README.txt b/inst/script/README.txt new file mode 100644 index 00000000..778e808e --- /dev/null +++ b/inst/script/README.txt @@ -0,0 +1,5 @@ +This example uses an abridged version of the gencode annotations. You do not want to use that file with your own data. It's abridged here only to reduce space in R packaging. + +The complete gencode annotation file can be found here: +https://github.com/broadinstitute/inferCNV_examples/tree/master/__gene_position_data + diff --git a/man/CreateInfercnvObject.Rd b/man/CreateInfercnvObject.Rd index a3104030..cbff686a 100644 --- a/man/CreateInfercnvObject.Rd +++ b/man/CreateInfercnvObject.Rd @@ -5,7 +5,9 @@ \title{CreateInfercnvObject} \usage{ CreateInfercnvObject(raw_counts_matrix, gene_order_file, annotations_file, - ref_group_names, delim = "\\t") + ref_group_names, delim = "\\t", max_cells_per_group = NULL, + min_max_counts_per_cell = NULL, chr_exclude = c("chrX", "chrY", + "chrM")) } \arguments{ \item{raw_counts_matrix}{the matrix of genes (rows) vs. cells (columns) containing the raw counts @@ -19,6 +21,12 @@ otherwise, if matrix or Matrix, will use the data directly.} \item{ref_group_names}{a vector containing the classifications of the reference (normal) cells to use for infering cnv} \item{delim}{delimiter used in the input files} + +\item{max_cells_per_group}{maximun number of cells to use per group. Default=NULL, using all cells defined in the annotations_file. This option is useful for randomly subsetting the existing data for a quicker preview run, such as using 50 cells per group instead of hundreds.} + +\item{min_max_counts_per_cell}{minimum and maximum counts allowed per cell. Any cells outside this range will be removed from the counts matrix. default=NULL and uses all cells. If used, should be set as c(min_counts, max_counts)} + +\item{chr_exclude}{list of chromosomes in the reference genome annotations that should be excluded from analysis. Default = c('chrX', 'chrY', 'chrM')} } \value{ infercnv diff --git a/man/Create_NGCHM.Rd b/man/Create_NGCHM.Rd deleted file mode 100644 index dcef5118..00000000 --- a/man/Create_NGCHM.Rd +++ /dev/null @@ -1,33 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/NextGenHeatMap.R -\name{Create_NGCHM} -\alias{Create_NGCHM} -\title{Create Next Generation Clustered Heat Map (NG-CHM)} -\usage{ -Create_NGCHM(infercnv_obj, path_to_shaidyMapGen, out_dir, title = NULL, - gene_symbol = NULL, x.center = NA, x.range = NA) -} -\arguments{ -\item{infercnv_obj}{(S4) InferCNV S4 object holding expression data, gene location data, annotation information.} - -\item{path_to_shaidyMapGen}{(string) Path to the java application ShaidyMapGen.jar} - -\item{out_dir}{(string) Path to where the infercnv.ngchm output file should be saved to} - -\item{title}{(string) Title that will be used for the heatmap} - -\item{gene_symbol}{(string) Specify the label type that is given to the gene needed to create linkouts, default is NULL} - -\item{x.center}{(integer) Center expression value for heatmap coloring.} - -\item{x.range}{(integer) Values for minimum and maximum thresholds for heatmap coloring.} -} -\value{ -Exports a NGCHM file named infercnv.ngchm and saves it to the output directory given to infercnv. -} -\description{ -Create highly interactive heat maps for single cell expression data using -Next Generation Clustered Heat Map (NG-CHM). NG-CHM was developed and -maintained by MD Anderson Department of Bioinformatics and Computational -Biology in collaboration with In Silico Solutions. -} diff --git a/man/MCMC_inferCNV-class.Rd b/man/MCMC_inferCNV-class.Rd new file mode 100644 index 00000000..f1a6a154 --- /dev/null +++ b/man/MCMC_inferCNV-class.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_BayesNet.R +\docType{class} +\name{MCMC_inferCNV-class} +\alias{MCMC_inferCNV-class} +\alias{MCMC_inferCNV} +\title{MCMC_inferCNV class} +\value{ +Returns a MCMC_inferCNV_obj +} +\description{ +Uses Markov Chain Monte Carlo (MCMC) and Gibbs sampling to estimate the posterior +probability of being in one of six Copy Number Variation states (states: 0, 0.5, 1, 1.5, 2, 3) for CNV's identified by +inferCNV's HMM. Posterior probabilities are found for the entire CNV cluster and each individual +cell line in the CNV. +} +\section{Slots}{ + +\describe{ +\item{\code{bugs_model}}{BUGS model.} + +\item{\code{sig}}{fitted values for cell lines, 1/standard deviation to be used for determining the distribution of each cell line} + +\item{\code{mu}}{Mean values to be used for determining the distribution of each cell line} + +\item{\code{group_id}}{ID's given to the cell clusters.} + +\item{\code{cell_gene}}{List containing the Cells and Genes that make up each CNV.} + +\item{\code{mcmc}}{Simulation output from sampling.} + +\item{\code{combined_mcmc}}{Combined chains for simulation output from sampling.} + +\item{\code{cnv_probabilities}}{Probabilities of each CNV belonging to a particular state from 0 (least likely)to 1 (most likely).} + +\item{\code{cell_probabilities}}{Probabilities of each cell being in a particular state, from 0 (least likely)to 1 (most likely).} + +\item{\code{args}}{Input arguments given by the user} + +\item{\code{cnv_regions}}{ID for each CNV found by the HMM} + +\item{\code{States}}{States that are identified and (depending on posterior MCMC input methods) modified.} +}} + diff --git a/man/MeanSD-method.Rd b/man/MeanSD-method.Rd new file mode 100644 index 00000000..5c6c0798 --- /dev/null +++ b/man/MeanSD-method.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_BayesNet.R +\docType{methods} +\name{MeanSD} +\alias{MeanSD} +\alias{MeanSD,MCMC_inferCNV-method} +\title{Get the cell Mean and Standard Deviation for identified cnv regions} +\usage{ +MeanSD(obj) + +\S4method{MeanSD}{MCMC_inferCNV}(obj) +} +\arguments{ +\item{obj}{The MCMC_inferCNV_obj S4 object.} +} +\value{ +obj The MCMC_inferCNV_obj S4 object. +} +\description{ +Get the cell Mean and Standard Deviation for identified cnv regions +} diff --git a/man/anscombe_transform.Rd b/man/anscombe_transform.Rd index 26eab55b..e9fd926d 100644 --- a/man/anscombe_transform.Rd +++ b/man/anscombe_transform.Rd @@ -9,6 +9,9 @@ anscombe_transform(infercnv_obj) \arguments{ \item{infercnv_obj}{infercnv_object} } +\value{ +infercnv_obj +} \description{ Performs Anscombe's transformation: y = 2 * sqrt(x + 3/8) diff --git a/man/apply_max_threshold_bounds.Rd b/man/apply_max_threshold_bounds.Rd index 7d29986b..af4f6d3a 100644 --- a/man/apply_max_threshold_bounds.Rd +++ b/man/apply_max_threshold_bounds.Rd @@ -11,6 +11,9 @@ apply_max_threshold_bounds(infercnv_obj, threshold) \item{threshold}{value to threshold the data} } +\value{ +infercnv_obj +} \description{ Assumes centered at zero and sets bounds to +- threshold value. } diff --git a/man/apply_median_filtering.Rd b/man/apply_median_filtering.Rd new file mode 100644 index 00000000..1b020947 --- /dev/null +++ b/man/apply_median_filtering.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/noise_reduction.R +\name{apply_median_filtering} +\alias{apply_median_filtering} +\title{apply_median_filtering} +\usage{ +apply_median_filtering(infercnv_obj, window_size = 7, + on_observations = TRUE, on_references = TRUE) +} +\arguments{ +\item{infercnv_obj}{infercnv_object} + +\item{window_size}{Size of the window side centered on the data point to filter (default = 7).} + +\item{on_observations}{boolean (default=TRUE), run on observations data (tumor cells).} + +\item{on_references}{boolean (default=TRUE), run on references (normal cells).} +} +\value{ +infercnv_obj with median filtering applied to observations +} +\description{ +Apply a median filtering to the expression matrix within each tumor bounds +} diff --git a/man/assign_HMM_states_to_proxy_expr_vals.Rd b/man/assign_HMM_states_to_proxy_expr_vals.Rd new file mode 100644 index 00000000..cf2e292e --- /dev/null +++ b/man/assign_HMM_states_to_proxy_expr_vals.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_HMM.R +\name{assign_HMM_states_to_proxy_expr_vals} +\alias{assign_HMM_states_to_proxy_expr_vals} +\title{assign_HMM_states_to_proxy_expr_vals} +\usage{ +assign_HMM_states_to_proxy_expr_vals(infercnv_obj) +} +\arguments{ +\item{infercnv_obj}{infercnv object} +} +\value{ +infercnv_obj +} +\description{ +Replaces the HMM state assignments with the cnv levels they represent. +} diff --git a/man/cellGene-method.Rd b/man/cellGene-method.Rd new file mode 100644 index 00000000..de73945f --- /dev/null +++ b/man/cellGene-method.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_BayesNet.R +\docType{methods} +\name{cellGene} +\alias{cellGene} +\alias{cellGene,MCMC_inferCNV-method} +\title{Access the values for cellGene} +\usage{ +cellGene(obj) + +\S4method{cellGene}{MCMC_inferCNV}(obj) +} +\arguments{ +\item{obj}{The MCMC_inferCNV_obj S4 object.} +} +\value{ +A list. +} +\description{ +This function returns the list of values in cellGene +} diff --git a/man/clear_noise.Rd b/man/clear_noise.Rd index deef196b..275efa21 100644 --- a/man/clear_noise.Rd +++ b/man/clear_noise.Rd @@ -4,12 +4,14 @@ \alias{clear_noise} \title{clear_noise()} \usage{ -clear_noise(infercnv_obj, threshold) +clear_noise(infercnv_obj, threshold, noise_logistic = FALSE) } \arguments{ \item{infercnv_obj}{infercnv_object} \item{threshold}{values within reference mean +- threshold are set to zero.} + +\item{noise_logistic}{uses a logistic (sigmoidal) function to noise removal.} } \value{ infercnv_obj diff --git a/man/clear_noise_via_ref_mean_sd.Rd b/man/clear_noise_via_ref_mean_sd.Rd index 5624e7d8..bd15af8e 100644 --- a/man/clear_noise_via_ref_mean_sd.Rd +++ b/man/clear_noise_via_ref_mean_sd.Rd @@ -4,13 +4,16 @@ \alias{clear_noise_via_ref_mean_sd} \title{clear_noise_via_ref_mean_sd()} \usage{ -clear_noise_via_ref_mean_sd(infercnv_obj, sd_amplifier = 1.5) +clear_noise_via_ref_mean_sd(infercnv_obj, sd_amplifier = 1.5, + noise_logistic = FALSE) } \arguments{ \item{infercnv_obj}{infercnv_object} \item{sd_amplifier}{multiplicative factor applied to the standard deviation to alter the noise range (default: 1.5)} + +\item{noise_logistic}{uses a logistic (sigmoidal) function to noise removal.} } \description{ Define noise based on the standard deviation of the reference cell expression data. diff --git a/man/compute_normalization_factor.Rd b/man/compute_normalization_factor.Rd deleted file mode 100644 index 10179320..00000000 --- a/man/compute_normalization_factor.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inferCNV_ops.R -\name{compute_normalization_factor} -\alias{compute_normalization_factor} -\title{compute_normalization_factor()} -\usage{ -compute_normalization_factor(infercnv_obj) -} -\arguments{ -\item{infercnv_obj}{infercnv_object} -} -\value{ -normalization_factor -} -\description{ -computes norm factor as: - normalize_factor = 10^round(log10(mean(cs))) -} diff --git a/man/determine_mean_delta_via_Z.Rd b/man/determine_mean_delta_via_Z.Rd new file mode 100644 index 00000000..563d1f30 --- /dev/null +++ b/man/determine_mean_delta_via_Z.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_i3HMM.R +\name{determine_mean_delta_via_Z} +\alias{determine_mean_delta_via_Z} +\title{determine_mean_delta_via_Z} +\usage{ +determine_mean_delta_via_Z(sigma, p) +} +\arguments{ +\item{sigma}{standard deviation for a Normal distribution} + +\item{p}{the p-value at which the distributions should intersect} +} +\value{ +delta_for_alt_mean +} +\description{ +determine means for amp/del distributions requiring that they cross the + given distribution based on sigma centered at zero and at the given p value +} diff --git a/man/dot-i3HMM_get_sd_trend_by_num_cells_fit.Rd b/man/dot-i3HMM_get_sd_trend_by_num_cells_fit.Rd new file mode 100644 index 00000000..d2ef307b --- /dev/null +++ b/man/dot-i3HMM_get_sd_trend_by_num_cells_fit.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_i3HMM.R +\name{.i3HMM_get_sd_trend_by_num_cells_fit} +\alias{.i3HMM_get_sd_trend_by_num_cells_fit} +\title{.i3HMM_get_sd_trend_by_num_cells_fit} +\usage{ +.i3HMM_get_sd_trend_by_num_cells_fit(infercnv_obj, i3_p_val = 0.05, + plot = FALSE) +} +\arguments{ +\item{infercnv_obj}{infercnv object} + +\item{i3_p_val}{the p-value to use for defining the position of means for the alternate amp/del distributions.} + +\item{plot}{boolean, set to TRUE to plot the mean/var fit.} +} +\value{ +normal_sd_trend list +} +\description{ +Determines the characteristics for the tumor cell residual intensities, including + fitting the variance in mean intensity as a function of number of cells sampled. +} diff --git a/man/filterHighPNormals.Rd b/man/filterHighPNormals.Rd new file mode 100644 index 00000000..71218e3f --- /dev/null +++ b/man/filterHighPNormals.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_BayesNet.R +\name{filterHighPNormals} +\alias{filterHighPNormals} +\title{filterHighPNormals: Filter the HMM identified CNV's by the CNV's posterior probability +of belonging to a normal state.} +\usage{ +filterHighPNormals(MCMC_inferCNV_obj, BayesMaxPNormal) +} +\arguments{ +\item{MCMC_inferCNV_obj}{MCMC infernCNV object.} + +\item{BayesMaxPNormal}{Option to filter CNV or cell lines by some probability threshold.} +} +\value{ +Returns a MCMC_inferCNV_obj With removed CNV's. +} +\description{ +The following function will filter the HMM identified CNV's by the CNV's posterior +probability of belonging to a normal state identified by the function inferCNVBayesNet(). Will filter +CNV's based on a user desired threshold probability. Any CNV with a probability of being normal above +the threshold will be removed. +} diff --git a/man/generate_cnv_region_reports.Rd b/man/generate_cnv_region_reports.Rd new file mode 100644 index 00000000..5ee88b5f --- /dev/null +++ b/man/generate_cnv_region_reports.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_HMM.R +\name{generate_cnv_region_reports} +\alias{generate_cnv_region_reports} +\title{generate_cnv_region_reports} +\usage{ +generate_cnv_region_reports(infercnv_obj, output_filename_prefix, out_dir, + ignore_neutral_state = NA, by = c("consensus", "subcluster", "cell")) +} +\arguments{ +\item{infercnv_obj}{infercnv object} + +\item{output_filename_prefix}{prefix for output filename} + +\item{out_dir}{output directory for report files to be written} + +\item{ignore_neutral_state}{numeric value representing the neutral state, which should be excluded from reporting (default: NA)} + +\item{by}{options("consensus", "subcluster", "cell"), determines the granularity at which to report +the CNV regions. Ideally, set to the same level at which the HMM predictions were performed.} +} +\value{ +None +} +\description{ +writes the CNV region report files +} diff --git a/man/getGenesCells-method.Rd b/man/getGenesCells-method.Rd new file mode 100644 index 00000000..49fe4eb2 --- /dev/null +++ b/man/getGenesCells-method.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_BayesNet.R +\docType{methods} +\name{getGenesCells} +\alias{getGenesCells} +\alias{getGenesCells,MCMC_inferCNV-method} +\title{Create a list that holds Genes and Cells for each separate identified CNV} +\usage{ +getGenesCells(obj, pred_cnv_genes_df, cell_groups_df) + +\S4method{getGenesCells}{MCMC_inferCNV}(obj, pred_cnv_genes_df, + cell_groups_df) +} +\arguments{ +\item{obj}{The MCMC_inferCNV_obj S4 object.} + +\item{pred_cnv_genes_df}{Data for genes in each predicted CNV.} + +\item{cell_groups_df}{Data for each cell in the predicted CNV's.} +} +\value{ +obj The MCMC_inferCNV_obj S4 object. +} +\description{ +Create a list that holds Genes and Cells for each separate identified CNV +} diff --git a/man/getProbabilities-method.Rd b/man/getProbabilities-method.Rd new file mode 100644 index 00000000..f8a5ff0c --- /dev/null +++ b/man/getProbabilities-method.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_BayesNet.R +\docType{methods} +\name{getProbabilities} +\alias{getProbabilities} +\alias{getProbabilities,MCMC_inferCNV-method} +\title{Set the probabilities for each CNV belonging to each state as well as probability of each cell belonging to a states} +\usage{ +getProbabilities(obj) + +\S4method{getProbabilities}{MCMC_inferCNV}(obj) +} +\arguments{ +\item{obj}{The MCMC_inferCNV_obj S4 object.} +} +\value{ +obj The MCMC_inferCNV_obj S4 object. +} +\description{ +Set the probabilities for each CNV belonging to each state as well as probability of each cell belonging to a states +} diff --git a/man/getStates-method.Rd b/man/getStates-method.Rd new file mode 100644 index 00000000..32baef6d --- /dev/null +++ b/man/getStates-method.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_BayesNet.R +\docType{methods} +\name{getStates} +\alias{getStates} +\alias{getStates,MCMC_inferCNV-method} +\title{Get the state values from the inferCNV HMM object} +\usage{ +getStates(obj, HMM_obj) + +\S4method{getStates}{MCMC_inferCNV}(obj, HMM_obj) +} +\arguments{ +\item{obj}{The MCMC_inferCNV_obj S4 object.} + +\item{HMM_obj}{The HMM inferCNV object.} +} +\value{ +obj The MCMC_inferCNV_obj S4 object. +} +\description{ +Get the state values from the inferCNV HMM object +} diff --git a/man/get_DE_genes_basic.Rd b/man/get_DE_genes_basic.Rd index 0f8cb3b1..c548891f 100644 --- a/man/get_DE_genes_basic.Rd +++ b/man/get_DE_genes_basic.Rd @@ -4,7 +4,8 @@ \alias{get_DE_genes_basic} \title{get_DE_genes_basic} \usage{ -get_DE_genes_basic(infercnv_obj, p_val_thresh = 0.05, test.use = "wilcoxon") +get_DE_genes_basic(infercnv_obj, p_val_thresh = 0.05, + test.use = "wilcoxon") } \arguments{ \item{infercnv_obj}{infercnv object} diff --git a/man/get_hspike_cnv_mean_sd_trend_by_num_cells_fit.Rd b/man/get_hspike_cnv_mean_sd_trend_by_num_cells_fit.Rd new file mode 100644 index 00000000..24c5f887 --- /dev/null +++ b/man/get_hspike_cnv_mean_sd_trend_by_num_cells_fit.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_HMM.R +\name{get_hspike_cnv_mean_sd_trend_by_num_cells_fit} +\alias{get_hspike_cnv_mean_sd_trend_by_num_cells_fit} +\title{get_hspike_cnv_mean_sd_trend_by_num_cells_fit} +\usage{ +get_hspike_cnv_mean_sd_trend_by_num_cells_fit(hspike_obj, plot = FALSE) +} +\arguments{ +\item{hspike_obj}{hidden spike object} + +\item{plot}{(boolean flag, default FALSE)} +} +\value{ +cnv_level_to_mean_sd_fit list +} +\description{ +determine the number of cells - to - variance fit for each of the cnv levels. + +Different numbers of cells are randomly selected from the distribution of residual intensitites at each +corresponding CNV level, the variance is computed, and a linear model is then fit. + +Note, this is similar to what is done in HoneyBadger, but has many differences to how they're doing it there, +which appears to involve cnv block length rather than cell number. Here, block size is not relevant, but rather +the number of cells in a pre-defined tumor subcluster. Also, values are extracted from our in silico spike-in. +} diff --git a/man/get_predicted_CNV_regions.Rd b/man/get_predicted_CNV_regions.Rd new file mode 100644 index 00000000..7b7ef3d8 --- /dev/null +++ b/man/get_predicted_CNV_regions.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_HMM.R +\name{get_predicted_CNV_regions} +\alias{get_predicted_CNV_regions} +\title{get_predicted_CNV_regions} +\usage{ +get_predicted_CNV_regions(infercnv_obj, by = c("consensus", "subcluster", + "cell")) +} +\arguments{ +\item{infercnv_obj}{infercnv object} + +\item{by}{options("consensus", "subcluster", "cell"), determines the granularity at which to report +the CNV regions. Ideally, set to the same level at which the HMM predictions were performed.} +} +\value{ +cnv_regions list +} +\description{ +Given the infercnv_obj containing the HMM state assignments in the expr.data slot, +retrieves a list of CNV regions. +} diff --git a/man/get_spike_dists.Rd b/man/get_spike_dists.Rd new file mode 100644 index 00000000..95ce2acd --- /dev/null +++ b/man/get_spike_dists.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_HMM.R +\name{get_spike_dists} +\alias{get_spike_dists} +\title{get_spike_dists} +\usage{ +get_spike_dists(hspike_obj) +} +\arguments{ +\item{hspike_obj}{hidden spike object} +} +\value{ +cnv_mean_sd list +} +\description{ +determines the N(mean,sd) parameters for each of the CNV states based on +the in silico spike in data (hspike). +} diff --git a/man/i3HMM_assign_HMM_states_to_proxy_expr_vals.Rd b/man/i3HMM_assign_HMM_states_to_proxy_expr_vals.Rd new file mode 100644 index 00000000..5a8cda7b --- /dev/null +++ b/man/i3HMM_assign_HMM_states_to_proxy_expr_vals.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_i3HMM.R +\name{i3HMM_assign_HMM_states_to_proxy_expr_vals} +\alias{i3HMM_assign_HMM_states_to_proxy_expr_vals} +\title{i3HMM_assign_HMM_states_to_proxy_expr_vals} +\usage{ +i3HMM_assign_HMM_states_to_proxy_expr_vals(infercnv_obj) +} +\arguments{ +\item{infercnv_obj}{infercnv object} +} +\value{ +infercnv_obj +} +\description{ +replace i3 HMM state predictions with their represented CNV levels +} diff --git a/man/i3HMM_predict_CNV_via_HMM_on_indiv_cells.Rd b/man/i3HMM_predict_CNV_via_HMM_on_indiv_cells.Rd new file mode 100644 index 00000000..7cf73d9d --- /dev/null +++ b/man/i3HMM_predict_CNV_via_HMM_on_indiv_cells.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_i3HMM.R +\name{i3HMM_predict_CNV_via_HMM_on_indiv_cells} +\alias{i3HMM_predict_CNV_via_HMM_on_indiv_cells} +\title{i3HMM_predict_CNV_via_HMM_on_indiv_cells} +\usage{ +i3HMM_predict_CNV_via_HMM_on_indiv_cells(infercnv_obj, i3_p_val = 0.05, + sd_trend = .i3HMM_get_sd_trend_by_num_cells_fit(infercnv_obj, + i3_p_val), t = 1e-06, use_KS = TRUE) +} +\arguments{ +\item{infercnv_obj}{infercnv object} + +\item{i3_p_val}{p-value used to determine mean for amp/del distributions} + +\item{sd_trend}{(optional) by default, computed automatically based on infercnv_obj, i3_p_val} + +\item{t}{alt state transition probability (default: 1e-6)} + +\item{use_KS}{boolean : use the KS test statistic to determine mean for amp/del dist HBadger style (default: TRUE)} +} +\value{ +infercnv_obj where infercnv_obj@expr.data contains state assignments. +} +\description{ +use the i3 HMM for predicting CNV at the level of individual cells +} diff --git a/man/i3HMM_predict_CNV_via_HMM_on_tumor_subclusters.Rd b/man/i3HMM_predict_CNV_via_HMM_on_tumor_subclusters.Rd new file mode 100644 index 00000000..44eb4635 --- /dev/null +++ b/man/i3HMM_predict_CNV_via_HMM_on_tumor_subclusters.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_i3HMM.R +\name{i3HMM_predict_CNV_via_HMM_on_tumor_subclusters} +\alias{i3HMM_predict_CNV_via_HMM_on_tumor_subclusters} +\title{i3HMM_predict_CNV_via_HMM_on_tumor_subclusters} +\usage{ +i3HMM_predict_CNV_via_HMM_on_tumor_subclusters(infercnv_obj, + i3_p_val = 0.05, + sd_trend = .i3HMM_get_sd_trend_by_num_cells_fit(infercnv_obj, + i3_p_val), t = 1e-06, use_KS = TRUE) +} +\arguments{ +\item{infercnv_obj}{infercnv object} + +\item{i3_p_val}{p-value used to determine mean for amp/del distributions} + +\item{sd_trend}{(optional) by default, computed automatically based on infercnv_obj, i3_p_val} + +\item{t}{alt state transition probability (default: 1e-6)} + +\item{use_KS}{boolean : use the KS test statistic to determine mean for amp/del dist HBadger style (default: TRUE)} +} +\value{ +infercnv_obj where infercnv_obj@expr.data contains state assignments. +} +\description{ +use the i3 HMM for predicting CNV at the level of tumor subclusters +} diff --git a/man/i3HMM_predict_CNV_via_HMM_on_whole_tumor_samples.Rd b/man/i3HMM_predict_CNV_via_HMM_on_whole_tumor_samples.Rd new file mode 100644 index 00000000..8cacaaf2 --- /dev/null +++ b/man/i3HMM_predict_CNV_via_HMM_on_whole_tumor_samples.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_i3HMM.R +\name{i3HMM_predict_CNV_via_HMM_on_whole_tumor_samples} +\alias{i3HMM_predict_CNV_via_HMM_on_whole_tumor_samples} +\title{i3HMM_predict_CNV_via_HMM_on_whole_tumor_samples} +\usage{ +i3HMM_predict_CNV_via_HMM_on_whole_tumor_samples(infercnv_obj, + i3_p_val = 0.05, + sd_trend = .i3HMM_get_sd_trend_by_num_cells_fit(infercnv_obj, + i3_p_val), t = 1e-06, use_KS = TRUE) +} +\arguments{ +\item{infercnv_obj}{infercnv object} + +\item{i3_p_val}{p-value used to determine mean for amp/del distributions} + +\item{sd_trend}{(optional) by default, computed automatically based on infercnv_obj, i3_p_val} + +\item{t}{alt state transition probability (default: 1e-6)} + +\item{use_KS}{boolean : use the KS test statistic to determine mean for amp/del dist HBadger style (default: TRUE)} +} +\value{ +infercnv_obj where infercnv_obj@expr.data contains state assignments. +} +\description{ +use the i3 HMM for predicting CNV at the level of whole tumor samples +} diff --git a/man/inferCNVBayesNet.Rd b/man/inferCNVBayesNet.Rd new file mode 100644 index 00000000..f52ec13c --- /dev/null +++ b/man/inferCNVBayesNet.Rd @@ -0,0 +1,42 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_BayesNet.R +\name{inferCNVBayesNet} +\alias{inferCNVBayesNet} +\title{inferCNVBayesNet: Run Bayesian Network Mixture Model To Obtain Posterior Probabilities For HMM Predicted States} +\usage{ +inferCNVBayesNet(file_dir, infercnv_obj, HMM_obj, out_dir, + model_file = system.file("BUGS_Mixture_Model", package = "infercnv"), + CORES = 1, postMcmcMethod = NULL, plotingProbs = TRUE, + quietly = TRUE, diagnostics = FALSE) +} +\arguments{ +\item{file_dir}{Location of the directory of the inferCNV outputs.} + +\item{infercnv_obj}{InferCNV object.} + +\item{HMM_obj}{InferCNV object with HMM states in expression data.} + +\item{out_dir}{(string) Path to where the output file should be saved to.} + +\item{model_file}{Path to the BUGS Model file.} + +\item{CORES}{Option to run parallel by specifying the number of cores to be used. (Default: 1)} + +\item{postMcmcMethod}{What actions to take after finishing the MCMC.} + +\item{plotingProbs}{Option for adding plots of Cell and CNV probabilities. (Default: TRUE)} + +\item{quietly}{Option to print descriptions along each step. (Default: TRUE)} + +\item{diagnostics}{Option to plot Diagnostic plots and tables. (Default: FALSE)} +} +\value{ +Returns a MCMC_inferCNV_obj and posterior probability of being in one of six Copy Number Variation states +(states: 0, 0.5, 1, 1.5, 2, 3) for CNV's identified by inferCNV's HMM. +} +\description{ +Uses Markov Chain Monte Carlo (MCMC) and Gibbs sampling to estimate the posterior +probability of being in one of six Copy Number Variation states (states: 0, 0.5, 1, 1.5, 2, 3) for CNV's identified by +inferCNV's HMM. Posterior probabilities are found for the entire CNV cluster and each individual +cell line in the CNV. +} diff --git a/man/infercnv-class.Rd b/man/infercnv-class.Rd index ed1f774f..26a4a07a 100644 --- a/man/infercnv-class.Rd +++ b/man/infercnv-class.Rd @@ -25,5 +25,9 @@ Slots in the infercnv object include: \item{\code{reference_grouped_cell_indices}}{ mapping [['group_name']] to c(cell column indices) for reference (normal) cells} \item{\code{observation_grouped_cell_indices}}{ mapping [['group_name']] to c(cell column indices) for observation (tumor) cells} + +\item{\code{tumor_subclusters}}{ stores subclustering of tumors if requested} + +\item{\code{.hspike}}{a hidden infercnv object populated with simulated spiked-in data} }} diff --git a/man/initializeObject-method.Rd b/man/initializeObject-method.Rd new file mode 100644 index 00000000..a2b7aeaa --- /dev/null +++ b/man/initializeObject-method.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_BayesNet.R +\docType{methods} +\name{initializeObject} +\alias{initializeObject} +\alias{initializeObject,MCMC_inferCNV-method} +\title{Initialize the MCMC_inferCNV_obj object} +\usage{ +initializeObject(obj, args_parsed, infercnv_obj) + +\S4method{initializeObject}{MCMC_inferCNV}(obj, args_parsed, infercnv_obj) +} +\arguments{ +\item{obj}{The MCMC_inferCNV_obj S4 object.} + +\item{args_parsed}{The arguments given to the function.} + +\item{infercnv_obj}{InferCNV object.} +} +\value{ +obj The MCMC_inferCNV_obj S4 object. +} +\description{ +Initialize the MCMC_inferCNV_obj object +} diff --git a/man/mask_non_DE_genes_basic.Rd b/man/mask_non_DE_genes_basic.Rd index c5a3bdbb..f98939b6 100644 --- a/man/mask_non_DE_genes_basic.Rd +++ b/man/mask_non_DE_genes_basic.Rd @@ -5,7 +5,8 @@ \title{mask_non_DE_genes_basic()} \usage{ mask_non_DE_genes_basic(infercnv_obj, p_val_thresh = 0.05, - test.use = "wilcoxon", center_val = mean(infercnv_obj@expr.data)) + test.use = "wilcoxon", center_val = mean(infercnv_obj@expr.data), + require_DE_all_normals = "any") } \arguments{ \item{infercnv_obj}{infercnv object} @@ -15,6 +16,8 @@ mask_non_DE_genes_basic(infercnv_obj, p_val_thresh = 0.05, \item{test.use}{statistical test to use. (default: "wilcoxon") alternatives include 'perm' or 't'.} \item{center_val}{value to assign to those genes that are not found to be statistically DE.} + +\item{require_DE_all_normals}{mask gene if found significantly DE in each normal comparison (default="any") options("any", "most", "all")} } \value{ infercnv_obj diff --git a/man/mcmcDiagnosticPlots-method.Rd b/man/mcmcDiagnosticPlots-method.Rd new file mode 100644 index 00000000..d5fd12d9 --- /dev/null +++ b/man/mcmcDiagnosticPlots-method.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_BayesNet.R +\docType{methods} +\name{mcmcDiagnosticPlots} +\alias{mcmcDiagnosticPlots} +\alias{mcmcDiagnosticPlots,MCMC_inferCNV-method} +\title{Create Diagnostic Plots And Summaries.} +\usage{ +mcmcDiagnosticPlots(obj) + +\S4method{mcmcDiagnosticPlots}{MCMC_inferCNV}(obj) +} +\arguments{ +\item{obj}{The MCMC_inferCNV_obj S4 object.} +} +\value{ +obj The MCMC_inferCNV_obj S4 object. +} +\description{ +Create Diagnostic Plots And Summaries in order to determine if convergence has occured. +} diff --git a/man/ngchm.Rd b/man/ngchm.Rd deleted file mode 100644 index acf41a9c..00000000 --- a/man/ngchm.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inferCNV_ops.R -\name{ngchm} -\alias{ngchm} -\title{ngchm() : generates next gen heatmap} -\usage{ -ngchm(infercnv_obj, out_dir = ".", title = "NGCHM", gene_symbol = NULL, - path_to_shaidyMapGen = NULL, x.range = NA, x.center = NA) -} -\arguments{ -\item{infercnv_obj}{An infercnv object} - -\item{out_dir}{output directory (default: '.')} - -\item{title}{title of the interactive heatmap (default: "NGCHM")} - -\item{gene_symbol}{##TODO (default: NULL)} - -\item{path_to_shaidyMapGen}{path to the shaidyMapGen jar file (default: NULL)} - -\item{x.range}{(integer) Values for minimum and maximum thresholds for heatmap coloring.} - -\item{x.center}{(integer) Center expression value for heatmap coloring.} -} -\description{ -Function for Generating a next-generation heatmap -} diff --git a/man/nonParallel-method.Rd b/man/nonParallel-method.Rd new file mode 100644 index 00000000..065038f4 --- /dev/null +++ b/man/nonParallel-method.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_BayesNet.R +\docType{methods} +\name{nonParallel} +\alias{nonParallel} +\alias{nonParallel,MCMC_inferCNV-method} +\title{Run simulations in Non-Parallel mode} +\usage{ +nonParallel(obj) + +\S4method{nonParallel}{MCMC_inferCNV}(obj) +} +\arguments{ +\item{obj}{The MCMC_inferCNV_obj S4 object.} +} +\value{ +obj The MCMC_inferCNV_obj S4 object. +} +\description{ +Run simulations in Non-Parallel mode +} diff --git a/man/normalize_counts_by_seq_depth.Rd b/man/normalize_counts_by_seq_depth.Rd index eb9f5c00..646fef69 100644 --- a/man/normalize_counts_by_seq_depth.Rd +++ b/man/normalize_counts_by_seq_depth.Rd @@ -11,10 +11,12 @@ normalize_counts_by_seq_depth(infercnv_obj, normalize_factor = NA) \item{normalize_factor}{total counts to scale the normalization to (default: NA, computed as described above)} } +\value{ +infercnv_obj +} \description{ Normalizes count data by total sum scaling For single cell data, a typical normalization factor is 1e5, providing counts per 100k total counts. -If a normalization factor is not provided, one is estimated based on: - 10^round(log10(mean(column_sums))) +If a normalization factor is not provided, the median lib size is used.: } diff --git a/man/plotProbabilities-method.Rd b/man/plotProbabilities-method.Rd new file mode 100644 index 00000000..3e13519e --- /dev/null +++ b/man/plotProbabilities-method.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_BayesNet.R +\docType{methods} +\name{plotProbabilities} +\alias{plotProbabilities} +\alias{plotProbabilities,MCMC_inferCNV-method} +\title{Plots the probability for each cnv belonging to a specific state and the probability of +each cell line belonging to a specific states.} +\usage{ +plotProbabilities(obj) + +\S4method{plotProbabilities}{MCMC_inferCNV}(obj) +} +\arguments{ +\item{obj}{The MCMC_inferCNV_obj S4 object.} +} +\value{ +obj The MCMC_inferCNV_obj S4 object. +} +\description{ +Plots the probability for each cnv belonging to a specific state and the probability of +each cell line belonging to a specific states. +} diff --git a/man/plot_cnv.Rd b/man/plot_cnv.Rd index 8aa15880..5ae53c50 100644 --- a/man/plot_cnv.Rd +++ b/man/plot_cnv.Rd @@ -7,9 +7,10 @@ plot_cnv(infercnv_obj, out_dir = ".", title = "inferCNV", obs_title = "Observations (Cells)", ref_title = "References (Cells)", cluster_by_groups = TRUE, k_obs_groups = 3, contig_cex = 1, - x.center = 0, x.range = NA, hclust_method = "ward.D", - color_safe_pal = TRUE, output_filename = "infercnv", - output_format = "png", ref_contig = NULL, write_expr_matrix = FALSE) + x.center = mean(infercnv_obj@expr.data), x.range = "auto", + hclust_method = "ward.D", color_safe_pal = FALSE, + output_filename = "infercnv", output_format = "png", png_res = 300, + dynamic_resize = 0, ref_contig = NULL, write_expr_matrix = FALSE) } \arguments{ \item{infercnv_obj}{infercnv object} @@ -41,12 +42,18 @@ plot_cnv(infercnv_obj, out_dir = ".", title = "inferCNV", \item{output_format}{format for heatmap image file (default: 'png'), options('png', 'pdf', NA) If set to NA, will print graphics natively} +\item{png_res}{Resolution for png output.} + +\item{dynamic_resize}{Factor (>= 0) by which to scale the dynamic resize of the observation +heatmap and the overall plot based on how many cells there are. +Default is 0, which disables the scaling. Try 1 first if you want to enable.} + \item{ref_contig}{If given, will focus cluster on only genes in this contig.} \item{write_expr_matrix}{Includes writing a matrix file containing the expression data that is plotted in the heatmap.} } \value{ -No return, void. +A list of all relevent settings used for the plotting to be able to reuse them in another plot call while keeping consistant plotting settings, most importantly x.range. } \description{ Formats the data and sends it for plotting. diff --git a/man/postProbNormal-method.Rd b/man/postProbNormal-method.Rd new file mode 100644 index 00000000..6b2f2f2b --- /dev/null +++ b/man/postProbNormal-method.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_BayesNet.R +\docType{methods} +\name{postProbNormal} +\alias{postProbNormal} +\alias{postProbNormal,MCMC_inferCNV-method} +\title{Get the probability of each cnv being a normal state and plot these probabilities.} +\usage{ +postProbNormal(obj, PNormal) + +\S4method{postProbNormal}{MCMC_inferCNV}(obj, PNormal) +} +\arguments{ +\item{obj}{The MCMC_inferCNV_obj S4 object.} + +\item{PNormal}{Option to add specific title to plot.} +} +\value{ +obj The MCMC_inferCNV_obj S4 object. +} +\description{ +Get the probability of each cnv being a normal state and plot these probabilities. +} diff --git a/man/predict_CNV_via_HMM_on_indiv_cells.Rd b/man/predict_CNV_via_HMM_on_indiv_cells.Rd new file mode 100644 index 00000000..39279a6e --- /dev/null +++ b/man/predict_CNV_via_HMM_on_indiv_cells.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_HMM.R +\name{predict_CNV_via_HMM_on_indiv_cells} +\alias{predict_CNV_via_HMM_on_indiv_cells} +\title{predict_CNV_via_HMM_on_indiv_cells} +\usage{ +predict_CNV_via_HMM_on_indiv_cells(infercnv_obj, + cnv_mean_sd = get_spike_dists(infercnv_obj@.hspike), t = 1e-06) +} +\arguments{ +\item{infercnv_obj}{infercnv object} + +\item{cnv_mean_sd}{(optional, by default automatically computed based in the infercnv_obj@.hspike object)} + +\item{t}{HMM alt state transition probability (default=1e-6)} +} +\value{ +infercnv_obj where the infercnv_obj@expr.data are replaced with the HMM state assignments. +} +\description{ +predict CNV levels at the individual cell level, using the i6 HMM +} diff --git a/man/predict_CNV_via_HMM_on_tumor_subclusters.Rd b/man/predict_CNV_via_HMM_on_tumor_subclusters.Rd new file mode 100644 index 00000000..39a58dbb --- /dev/null +++ b/man/predict_CNV_via_HMM_on_tumor_subclusters.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_HMM.R +\name{predict_CNV_via_HMM_on_tumor_subclusters} +\alias{predict_CNV_via_HMM_on_tumor_subclusters} +\title{predict_CNV_via_HMM_on_tumor_subclusters} +\usage{ +predict_CNV_via_HMM_on_tumor_subclusters(infercnv_obj, + cnv_mean_sd = get_spike_dists(infercnv_obj@.hspike), + cnv_level_to_mean_sd_fit = get_hspike_cnv_mean_sd_trend_by_num_cells_fit(infercnv_obj@.hspike), + t = 1e-06) +} +\arguments{ +\item{infercnv_obj}{infercnv object} + +\item{cnv_mean_sd}{(optional, by default automatically computed based in the infercnv_obj@.hspike object)} + +\item{cnv_level_to_mean_sd_fit}{(optional, by default automatically computed based on get_hspike_cnv_mean_sd_trend_by_num_cells_fit(infercnv_obj@.hspike)} + +\item{t}{HMM alt state transition probability (default=1e-6)} +} +\value{ +infercnv_obj where the infercnv_obj@expr.data are replaced with the HMM state assignments. +} +\description{ +predict CNV levels at the tumor subcluster level, using the i6 HMM +} diff --git a/man/predict_CNV_via_HMM_on_whole_tumor_samples.Rd b/man/predict_CNV_via_HMM_on_whole_tumor_samples.Rd new file mode 100644 index 00000000..bc2a930e --- /dev/null +++ b/man/predict_CNV_via_HMM_on_whole_tumor_samples.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_HMM.R +\name{predict_CNV_via_HMM_on_whole_tumor_samples} +\alias{predict_CNV_via_HMM_on_whole_tumor_samples} +\title{predict_CNV_via_HMM_on_whole_tumor_samples} +\usage{ +predict_CNV_via_HMM_on_whole_tumor_samples(infercnv_obj, + cnv_mean_sd = get_spike_dists(infercnv_obj@.hspike), + cnv_level_to_mean_sd_fit = get_hspike_cnv_mean_sd_trend_by_num_cells_fit(infercnv_obj@.hspike), + t = 1e-06) +} +\arguments{ +\item{infercnv_obj}{infercnv object} + +\item{cnv_mean_sd}{(optional, by default automatically computed based in the infercnv_obj@.hspike object)} + +\item{cnv_level_to_mean_sd_fit}{(optional, by default automatically computed based on get_hspike_cnv_mean_sd_trend_by_num_cells_fit(infercnv_obj@.hspike)} + +\item{t}{HMM alt state transition probability (default=1e-6)} +} +\value{ +infercnv_obj where the infercnv_obj@expr.data are replaced with the HMM state assignments. +} +\description{ +predict CNV levels at the tumor sample level, using the i6 HMM +} diff --git a/man/removeCNV-method.Rd b/man/removeCNV-method.Rd new file mode 100644 index 00000000..042fac34 --- /dev/null +++ b/man/removeCNV-method.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_BayesNet.R +\docType{methods} +\name{removeCNV} +\alias{removeCNV} +\alias{removeCNV,MCMC_inferCNV-method} +\title{Run simulations and remove CNV's that have a probability of being normal above a set thresholld. +This removes possible false posotives identified by the HMM.} +\usage{ +removeCNV(obj) + +\S4method{removeCNV}{MCMC_inferCNV}(obj) +} +\arguments{ +\item{obj}{The MCMC_inferCNV_obj S4 object.} +} +\value{ +obj The MCMC_inferCNV_obj S4 object. +} +\description{ +Run simulations and remove CNV's that have a probability of being normal above a set thresholld. +This removes possible false posotives identified by the HMM. +} diff --git a/man/removeCells-method.Rd b/man/removeCells-method.Rd new file mode 100644 index 00000000..1c2c97e0 --- /dev/null +++ b/man/removeCells-method.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_BayesNet.R +\docType{methods} +\name{removeCells} +\alias{removeCells} +\alias{removeCells,MCMC_inferCNV-method} +\title{Run simulations and remove cells from cnv's that are predicted to be normal} +\usage{ +removeCells(obj) + +\S4method{removeCells}{MCMC_inferCNV}(obj) +} +\arguments{ +\item{obj}{The MCMC_inferCNV_obj S4 object.} +} +\value{ +obj The MCMC_inferCNV_obj S4 object. +} +\description{ +Run simulations and remove cells from cnv's that are predicted to be normal +} diff --git a/man/remove_genes_at_ends_of_chromosomes.Rd b/man/remove_genes_at_ends_of_chromosomes.Rd index ab6fe364..46c7f0e0 100644 --- a/man/remove_genes_at_ends_of_chromosomes.Rd +++ b/man/remove_genes_at_ends_of_chromosomes.Rd @@ -11,6 +11,9 @@ remove_genes_at_ends_of_chromosomes(infercnv_obj, window_length) \item{window_length}{length of the window to use.} } +\value{ +infercnv_obj +} \description{ Removes genes that are within window_length/2 of the ends of each chromosome. } diff --git a/man/remove_spike.Rd b/man/remove_spike.Rd deleted file mode 100644 index 881dab64..00000000 --- a/man/remove_spike.Rd +++ /dev/null @@ -1,17 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inferCNV_spike.R -\name{remove_spike} -\alias{remove_spike} -\title{remove_spike()} -\usage{ -remove_spike(infercnv_obj) -} -\arguments{ -\item{infercnv_obj}{An infercnv object populated with raw count data} -} -\value{ -infercnv_obj -} -\description{ -Removes the spiked-in group named 'SPIKE' from the infercnv_obj -} diff --git a/man/returningInferCNV-method.Rd b/man/returningInferCNV-method.Rd new file mode 100644 index 00000000..7e24002c --- /dev/null +++ b/man/returningInferCNV-method.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_BayesNet.R +\docType{methods} +\name{returningInferCNV} +\alias{returningInferCNV} +\alias{returningInferCNV,MCMC_inferCNV-method} +\title{Return the InferCNV Object with the new adjucted CNV's} +\usage{ +returningInferCNV(obj, infercnv_obj) + +\S4method{returningInferCNV}{MCMC_inferCNV}(obj, infercnv_obj) +} +\arguments{ +\item{obj}{The MCMC_inferCNV_obj S4 object.} + +\item{infercnv_obj}{Current inferCNV object that will be adjusted based on the results of the Bayesian Network Model.} +} +\value{ +An inferCNV object +} +\description{ +Returns Infercnv Object +} diff --git a/man/run.Rd b/man/run.Rd index 9b4e8bc6..e72ab041 100644 --- a/man/run.Rd +++ b/man/run.Rd @@ -5,40 +5,121 @@ \title{run() : Invokes a routine inferCNV analysis to Infer CNV changes given a matrix of RNASeq counts.} \usage{ run(infercnv_obj, cutoff = 1, min_cells_per_gene = 3, out_dir = ".", - normalize_factor = NA, window_length = 101, num_ref_groups = NULL, - max_centered_threshold = NA, noise_filter = NA, sd_amplifier = 1.5, + window_length = 101, smooth_method = c("pyramidinal", "runmeans"), + num_ref_groups = NULL, ref_subtract_use_mean_bounds = TRUE, cluster_by_groups = FALSE, k_obs_groups = 1, - outlier_method_bound = "average_bound", outlier_lower_bound = NA, - outlier_upper_bound = NA, hclust_method = "complete", - anscombe_normalize = TRUE, use_zscores = FALSE, - remove_genes_at_chr_ends = FALSE, mask_nonDE_genes = FALSE, - mask_nonDE_pval = 0.05, test.use = "wilcoxon", plot_steps = FALSE, - debug = FALSE, include.spike = FALSE, spike_in_chrs = NULL, - spike_in_multiplier_vec = NULL, pseudocount = 0) + hclust_method = "ward.D2", max_centered_threshold = 3, + scale_data = FALSE, HMM = FALSE, HMM_transition_prob = 1e-06, + HMM_report_by = c("subcluster", "consensus", "cell"), + HMM_type = c("i6", "i3"), HMM_i3_pval = 0.05, HMM_i3_use_KS = TRUE, + BayesMaxPNormal = 0.5, sim_method = "meanvar", + sim_foreground = FALSE, analysis_mode = c("samples", "subclusters", + "cells"), tumor_subcluster_partition_method = c("random_trees", + "qnorm", "pheight", "qgamma", "shc"), tumor_subcluster_pval = 0.1, + denoise = FALSE, noise_filter = NA, sd_amplifier = 1.5, + noise_logistic = FALSE, outlier_method_bound = "average_bound", + outlier_lower_bound = NA, outlier_upper_bound = NA, + final_scale_limits = NULL, final_center_val = NULL, debug = FALSE, + num_threads = 4, plot_steps = FALSE, resume_mode = TRUE, + png_res = 300, plot_probabilities = TRUE, diagnostics = FALSE, + remove_genes_at_chr_ends = FALSE, prune_outliers = FALSE, + mask_nonDE_genes = FALSE, mask_nonDE_pval = 0.05, + test.use = "wilcoxon", require_DE_all_normals = "any", + hspike_aggregate_normals = FALSE, no_plot = FALSE, + no_prelim_plot = FALSE) } \arguments{ \item{infercnv_obj}{An infercnv object populated with raw count data} \item{cutoff}{Cut-off for the min average read counts per gene among reference cells. (default: 1)} -\item{min_cells_per_gene}{minimum number of reference cells requiring expression measurements to include the corresponding gene. +\item{min_cells_per_gene}{minimum number of reference cells requiring expression measurements to include the corresponding gene. default: 3} -\item{out_dir}{path to directory to deposit outputs (default: '.')} +\item{out_dir}{path to directory to deposit outputs (default: '.') -\item{normalize_factor}{scaling factor for total sum of counts (default: NA, in which case -will be set = 10^round(log10(mean(colSums))), typically setting to 1e5} +## Smoothing params} \item{window_length}{Length of the window for the moving average (smoothing). Should be an odd integer. (default: 101)#'} +\item{smooth_method}{Method to use for smoothing: c(runmeans,pyramidinal) default: pyramidinal + +#####} + \item{num_ref_groups}{The number of reference groups or a list of indices for each group of reference indices in relation to reference_obs. (default: NULL)} +\item{ref_subtract_use_mean_bounds}{Determine means separately for each ref group, then remove intensities within bounds of means (default: TRUE) + Otherwise, uses mean of the means across groups. + +#############################} + +\item{cluster_by_groups}{If observations are defined according to groups (ie. patients), each group +of cells will be clustered separately. (default=FALSE, instead will use k_obs_groups setting)} + +\item{k_obs_groups}{Number of groups in which to break the observations. (default: 1)} + +\item{hclust_method}{Method used for hierarchical clustering of cells. Valid choices are: +"ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "median", "centroid". +default("ward.D2")} + \item{max_centered_threshold}{The maximum value a value can have after -centering. Also sets a lower bound of --1 * this value.} + centering. Also sets a lower bound of + -1 * this value. (default: 3), +can set to a numeric value or "auto" to bound by the mean bounds across cells. +Set to NA to turn off.} + +\item{scale_data}{perform Z-scaling of logtransformed data (default: FALSE). This may be turned on if you have + very different kinds of data for your normal and tumor samples. For example, you need to use GTEx + representative normal expression profiles rather than being able to leverage normal single cell data + that goes with your experiment. + +######################################################################### +## Downstream Analyses (HMM or non-DE-masking) based on tumor subclusters} + +\item{HMM}{when set to True, runs HMM to predict CNV level (default: FALSE)} + +\item{HMM_transition_prob}{transition probability in HMM (default: 1e-6)} + +\item{HMM_report_by}{cell, consensus, subcluster (default: subcluster) Note, reporting is performed entirely separately from the HMM prediction. So, you can predict on subclusters, but get per-cell level reporting (more voluminous output).} + +\item{HMM_type}{HMM model type. Options: (i6 or i3): +i6: infercnv 6-state model (0, 0.5, 1, 1.5, 2, >2) where state emissions are calibrated based on simulated CNV levels. +i3: infercnv 3-state model (del, neutral, amp) configured based on normal cells and HMM_i3_pval} + +\item{HMM_i3_pval}{p-value for HMM i3 state overlap (default: 0.05)} + +\item{HMM_i3_use_KS}{boolean: use the KS test statistic to estimate mean of amp/del distributions (ala HoneyBadger). (default=TRUE) + + +## Filtering low-conf HMM preds via BayesNet P(Normal)} + +\item{BayesMaxPNormal}{maximum P(Normal) allowed for a CNV prediction according to BayesNet. (default=0.5, note zero turns it off) + +###################### +## Tumor subclustering} + +\item{sim_method}{method for calibrating CNV levels in the i6 HMM (default: 'meanvar')} + +\item{sim_foreground}{don't use... for debugging, developer option.} + +\item{analysis_mode}{options(samples|subclusters|cells), Grouping level for image filtering or HMM predictions. +default: samples (fastest, but subclusters is ideal)} + +\item{tumor_subcluster_partition_method}{method for defining tumor subclusters. Options('random_trees', 'qnorm') +random_trees: (default) slow but best. Uses permutation statistics w/ tree construction. +qnorm: defines tree height based on the quantile defined by the tumor_subcluster_pval} + +\item{tumor_subcluster_pval}{max p-value for defining a significant tumor subcluster (default: 0.1) + + + +############################# +## de-noising parameters ####} + +\item{denoise}{If True, turns on denoising according to options below} \item{noise_filter}{Values +- from the reference cell mean will be set to zero (whitening effect) default(NA, instead will use sd_amplifier below.} @@ -46,45 +127,67 @@ default(NA, instead will use sd_amplifier below.} \item{sd_amplifier}{Noise is defined as mean(reference_cells) +- sdev(reference_cells) * sd_amplifier default: 1.0} -\item{cluster_by_groups}{If observations are defined according to groups (ie. patients), each group -of cells will be clustered separately. (default=FALSE, instead will use k_obs_groups setting)} +\item{noise_logistic}{use the noise_filter or sd_amplifier based threshold (whichever is invoked) as the midpoint in a + logistic model for downscaling values close to the mean. (default: FALSE) -\item{k_obs_groups}{Number of groups in which to break the observations. (default: 1)} + +################## +## Outlier pruning} \item{outlier_method_bound}{Method to use for bounding outlier values. (default: "average_bound") Will preferentially use outlier_lower_bounda and outlier_upper_bound if set.} \item{outlier_lower_bound}{Outliers below this lower bound will be set to this value.} -\item{outlier_upper_bound}{Outliers above this upper bound will be set to this value.} +\item{outlier_upper_bound}{Outliers above this upper bound will be set to this value. -\item{hclust_method}{Method used for hierarchical clustering of cells. Valid choices are: -"ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "median", "centroid".} -\item{anscombe_normalize}{Perform anscombe normalization on normalized counts before log transformation.} +########################## +## Misc options} + +\item{final_scale_limits}{The scale limits for the final heatmap output by the run() method. Default "auto". Alt, c(low,high)} + +\item{final_center_val}{Center value for final heatmap output by the run() method.} + +\item{debug}{If true, output debug level logging.} + +\item{num_threads}{(int) number of threads for parallel steps (default: 4)} + +\item{plot_steps}{If true, saves infercnv objects and plots data at the intermediate steps.} + +\item{resume_mode}{leverage pre-computed and stored infercnv objects where possible. (default=TRUE)} + +\item{png_res}{Resolution for png output.} + +\item{plot_probabilities}{option to plot posterior probabilities (default: TRUE)} -\item{use_zscores}{If true, converts log(expression) data to zscores based on reference cell expr distribution.} +\item{diagnostics}{option to create diagnostic plots after running the Bayesian model (default: FALSE) -\item{remove_genes_at_chr_ends}{If true, removes the window_length/2 genes at both ends of the chromosome.} +####################### +## Experimental options} + +\item{remove_genes_at_chr_ends}{experimental option: If true, removes the window_length/2 genes at both ends of the chromosome.} + +\item{prune_outliers}{Define outliers loosely as those that exceed the mean boundaries among all cells. These are set to the bounds. + +## experimental opts involving DE analysis} \item{mask_nonDE_genes}{If true, sets genes not significantly differentially expressed between tumor/normal to -the mean value for the complete data set} +the mean value for the complete data set (default: 0.05)} \item{mask_nonDE_pval}{p-value threshold for defining statistically significant DE genes between tumor/normal} \item{test.use}{statistical test to use. (default: "wilcoxon") alternatives include 'perm' or 't'.'} -\item{plot_steps}{If true, saves infercnv objects and plots data at the intermediate steps.} - -\item{debug}{If true, output debug level logging.} +\item{require_DE_all_normals}{If mask_nonDE_genes is set, those genes will be masked only if they are are found as DE according to test.use and mask_nonDE_pval in each of the comparisons to normal cells options: {"any", "most", "all"} (default: "any") -\item{include.spike}{If true, introduces an artificial spike-in of data at ~0x and 2x for scaling residuals between 0-2. (default: F)} +other experimental opts} -\item{spike_in_chrs}{vector listing of chr names to use for modeling spike-ins (default: NULL - uses the two largest chrs. ex. c('chr1', 'chr2') )} +\item{hspike_aggregate_normals}{instead of trying to model the different normal groupings individually, just merge them in the hspike.} -\item{pseudocount}{Number of counts to add to each gene of each cell post-filtering of genes and cells and pre-total sum count normalization. (default: 0)} +\item{no_plot}{don't make any of the images. Instead, generate all non-image outputs as part of the run. (default: FALSE)} -\item{spike_in_multiplier}{vector of weights matching spike_in_chrs (default: c(0.01, 2.0) for modeling loss/gain of both chrs)} +\item{no_prelim_plot}{don't make the preliminary infercnv image (default: FALSE)} } \value{ infercnv_obj containing filtered and transformed data diff --git a/man/runMCMC-method.Rd b/man/runMCMC-method.Rd new file mode 100644 index 00000000..f7be73e7 --- /dev/null +++ b/man/runMCMC-method.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_BayesNet.R +\docType{methods} +\name{runMCMC} +\alias{runMCMC} +\alias{runMCMC,MCMC_inferCNV-method} +\title{Run simulations using rjags.} +\usage{ +runMCMC(obj) + +\S4method{runMCMC}{MCMC_inferCNV}(obj) +} +\arguments{ +\item{obj}{The MCMC_inferCNV_obj S4 object.} +} +\value{ +obj The MCMC_inferCNV_obj S4 object. +} +\description{ +Run MCMC simulations using rjags. Also returns a plot the probability of each CNV being +normal before running any kind of post MCMC modification. +} diff --git a/man/scale_cnv_by_spike.Rd b/man/scale_cnv_by_spike.Rd deleted file mode 100644 index 5668c59c..00000000 --- a/man/scale_cnv_by_spike.Rd +++ /dev/null @@ -1,22 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inferCNV_spike.R -\name{scale_cnv_by_spike} -\alias{scale_cnv_by_spike} -\title{scale_cnv_by_spike()} -\usage{ -scale_cnv_by_spike(infercnv_obj) -} -\arguments{ -\item{infercnv_obj}{An infercnv object populated with raw count data} -} -\value{ -infercnv_obj -} -\description{ -Scales expression data according to the expression value bounds in the SPIKE group. -} -\details{ -Assumes data is centered at 1 -Expression below 1 is scaled according to the left spike bound set to zero. -Expression above 1 is scaled according to the right spike bound set to two. -} diff --git a/man/scale_infercnv_expr.Rd b/man/scale_infercnv_expr.Rd new file mode 100644 index 00000000..966f7237 --- /dev/null +++ b/man/scale_infercnv_expr.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_ops.R +\name{scale_infercnv_expr} +\alias{scale_infercnv_expr} +\title{scale_infercnv_expr} +\usage{ +scale_infercnv_expr(infercnv_obj) +} +\arguments{ +\item{infercnv_obj}{infercnv object} +} +\value{ +infercnv_obj +} +\description{ +performs scaling to expression values for each cell, +assigning all values to a standard normal centered at zero. +} diff --git a/man/setBayesMaxPNormal-method.Rd b/man/setBayesMaxPNormal-method.Rd new file mode 100644 index 00000000..90f62d9d --- /dev/null +++ b/man/setBayesMaxPNormal-method.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_BayesNet.R +\docType{methods} +\name{setBayesMaxPNormal} +\alias{setBayesMaxPNormal} +\alias{setBayesMaxPNormal,MCMC_inferCNV-method} +\title{Add the probability threshold for the arguments in the MCMC infercnv object.} +\usage{ +setBayesMaxPNormal(obj, BayesMaxPNormal) + +\S4method{setBayesMaxPNormal}{MCMC_inferCNV}(obj, BayesMaxPNormal) +} +\arguments{ +\item{obj}{The MCMC_inferCNV_obj S4 object.} + +\item{BayesMaxPNormal}{probability to be used as a threshold for CNV or cell removal.} +} +\value{ +MCMC_inferCNV_obj S4 object. +} +\description{ +This function adds the variable BayesMaxPNormal to the arguments slot of the the MCMC infercnv object. +} diff --git a/man/smooth_by_chromosome_runmeans.Rd b/man/smooth_by_chromosome_runmeans.Rd new file mode 100644 index 00000000..3131ba59 --- /dev/null +++ b/man/smooth_by_chromosome_runmeans.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_ops.R +\name{smooth_by_chromosome_runmeans} +\alias{smooth_by_chromosome_runmeans} +\title{smooth_by_chromosome_runmeans} +\usage{ +smooth_by_chromosome_runmeans(infercnv_obj, window_length) +} +\arguments{ +\item{infercnv_obj}{infercnv object} + +\item{window_length}{window length to use for smoothing.} +} +\value{ +infercnv_obj +} +\description{ +uses the simpler caTools:runmeans() to perform smoothing operations. +} diff --git a/man/spike_in_variation_chrs.Rd b/man/spike_in_variation_chrs.Rd deleted file mode 100644 index fd44edbe..00000000 --- a/man/spike_in_variation_chrs.Rd +++ /dev/null @@ -1,27 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/inferCNV_spike.R -\name{spike_in_variation_chrs} -\alias{spike_in_variation_chrs} -\title{spike_in_variation_chrs() - -Adds a 'SPIKE'-in to the observations set at different thresholds of loss/gain to -aid in tracking the effect of infercnv operations and for defining the final scaling.} -\usage{ -spike_in_variation_chrs(infercnv_obj, spike_in_chrs = NULL, - spike_in_multiplier_vec = c(0.01, 2), max_cells = 100, - min_genes_per_chr = 100) -} -\arguments{ -\item{infercnv_obj}{An infercnv object populated with raw count data} - -\item{spike_in_chrs}{: define the chromsomes that will serve as signal for gain/loss -default: picks chrosomes in order of size} - -\item{spike_in_multiplier_vec}{: factors that define relative expression for gain/loss -and must match ordering of spike_in_chrs above -default: c(0.01, 2.0)} - -\item{max_cells}{max number of cells to incorporate in the spike-in} - -\item{min_genes_per_chr}{: default 100} -} diff --git a/man/split_references.Rd b/man/split_references.Rd index a8f83d89..d0327b41 100644 --- a/man/split_references.Rd +++ b/man/split_references.Rd @@ -4,7 +4,8 @@ \alias{split_references} \title{split_references()} \usage{ -split_references(infercnv_obj, num_groups = 2, hclust_method = "complete") +split_references(infercnv_obj, num_groups = 2, + hclust_method = "complete") } \arguments{ \item{infercnv_obj}{infercnv_object} diff --git a/man/subtract_ref_expr_from_obs.Rd b/man/subtract_ref_expr_from_obs.Rd index 5bab50e1..2de70a2b 100644 --- a/man/subtract_ref_expr_from_obs.Rd +++ b/man/subtract_ref_expr_from_obs.Rd @@ -4,12 +4,16 @@ \alias{subtract_ref_expr_from_obs} \title{subtract_ref_expr_from_obs()} \usage{ -subtract_ref_expr_from_obs(infercnv_obj, inv_log = FALSE) +subtract_ref_expr_from_obs(infercnv_obj, inv_log = FALSE, + use_bounds = TRUE) } \arguments{ \item{infercnv_obj}{infercnv_object} \item{inv_log}{mean values will be determined based on (2^x -1)} + +\item{use_bounds}{if multiple normal data sets are used, it takes the bounds of the means from each set for subtraction. +Alternatively, will use the mean( mean(normal) for each normal) default: TRUE} } \value{ infercnv_obj containing the reference subtracted values. diff --git a/man/withParallel-method.Rd b/man/withParallel-method.Rd new file mode 100644 index 00000000..8a60e817 --- /dev/null +++ b/man/withParallel-method.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inferCNV_BayesNet.R +\docType{methods} +\name{withParallel} +\alias{withParallel} +\alias{withParallel,MCMC_inferCNV-method} +\title{Run simulations in Parallel} +\usage{ +withParallel(obj) + +\S4method{withParallel}{MCMC_inferCNV}(obj) +} +\arguments{ +\item{obj}{The MCMC_inferCNV_obj S4 object.} +} +\value{ +obj The MCMC_inferCNV_obj S4 object. +} +\description{ +Run simulations in Parallel +} diff --git a/scripts/HB_example_to_inferCNV_obj.R b/scripts/HB_example_to_inferCNV_obj.R new file mode 100755 index 00000000..016f88d8 --- /dev/null +++ b/scripts/HB_example_to_inferCNV_obj.R @@ -0,0 +1,18 @@ +#!/usr/bin/env Rscript + +library(HoneyBADGER) +library(infercnv) + +data(gexp) ## tumor cells, dim: [6082,75] +data(ref) ## reference, length: 6082 + + + +raw.data = cbind(gexp, data.frame('GTEX'=ref)) + +cell.annots = data.frame(cell=colnames(gexp), type='tumor') +cell.annots = rbind(cell.annots, data.frame(cell='GTEX', type='normal')) + +write.table(raw.data, file="hb.example.matrix", quote=F, sep="\t") +write.table(cell.annots, file='hb.example.cell_annots', quote=F, sep="\t", col.names=F, row.names=F) + diff --git a/scripts/KS_matrix_comparison.R b/scripts/KS_matrix_comparison.R new file mode 100755 index 00000000..39461dc8 --- /dev/null +++ b/scripts/KS_matrix_comparison.R @@ -0,0 +1,66 @@ +#!/usr/bin/env Rscript + +set.seed(1234) + +suppressPackageStartupMessages(library("argparse")) + +library(tidyverse) + +parser = ArgumentParser() +parser$add_argument("--matrix1", required=T, nargs=1) +parser$add_argument("--matrix2", required=T, nargs=1) +parser$add_argument("--log", required=F, default=FALSE, action="store_true") +parser$add_argument("--output", required=T, nargs=1, help="output filename pdf") + +args = parser$parse_args() + + +#' learn distribution parameters: +data1 = as.matrix(read.table(args$matrix1, header=T, row.names=1)) +data2 = as.matrix(read.table(args$matrix2, header=T, row.names=1)) + + +png(args$output) +if (args$log) { + data1 = log(data1+1) + data2 = log(data2+1) +} + + +## plotting ideas borrowed from +## https://stackoverflow.com/questions/39162178/kolmogorov-smirnov-plot-in-r-ggplot + + +m1_ecdf = ecdf(data1) +m2_ecdf = ecdf(data2) +val_range = range(data1, data2) +step = (val_range[2] - val_range[1])/100 +vals = seq(val_range[1], val_range[2], step) + + +m1_cdf = m1_ecdf(vals) +m2_cdf = m2_ecdf(vals) + +cdfs = data.frame(vals, + m1_cdf, + m2_cdf) + +ks_point = which.max(abs(cdfs$m1_cdf - cdfs$m2_cdf)) +ks_point_info = cdfs[ks_point,] +##message("KS point info: ", paste(ks_point_info, collapse=', ')) + +cdfs = cdfs %>% gather('m1_cdf', 'm2_cdf', key='type', value='cdf') + + +ggplot(cdfs, aes(x=vals, y=cdf)) + + geom_line(aes(color=type, linetype=type)) + + geom_segment(aes(x=ks_point_info$vals, + y=ks_point_info$m1_cdf, + xend=ks_point_info$vals, + yend=ks_point_info$m2_cdf), color='magenta', size=2) + + ggtitle(sprintf("%s vs. %s KS", args$matrix1, args$matrix2)) + xlab("number") + ylab("cdf") + + + + + diff --git a/scripts/KS_matrix_comparison.use_infercnv_obj.R b/scripts/KS_matrix_comparison.use_infercnv_obj.R new file mode 100755 index 00000000..4b21e1f2 --- /dev/null +++ b/scripts/KS_matrix_comparison.use_infercnv_obj.R @@ -0,0 +1,71 @@ +#!/usr/bin/env Rscript + +set.seed(1234) + +suppressPackageStartupMessages(library("argparse")) + +library(tidyverse) + +parser = ArgumentParser() +parser$add_argument("--matrix1", required=T, nargs=1) +parser$add_argument("--infercnv_obj", required=T, nargs=1) +parser$add_argument("--log", required=F, default=FALSE, action="store_true") +parser$add_argument("--output", required=T, nargs=1, help="output filename pdf") + +args = parser$parse_args() + + +#' learn distribution parameters: +data1 = as.matrix(read.table(args$matrix1, header=T, row.names=1)) + + + +infercnv_obj_file = args$infercnv_obj +infercnv_obj = readRDS(infercnv_obj_file) +data2 = as.matrix(infercnv_obj@expr.data[, unlist(infercnv_obj@reference_grouped_cell_indices)]) + + +png(args$output) +if (args$log) { + data1 = log(data1+1) + data2 = log(data2+1) +} + + +## plotting ideas borrowed from +## https://stackoverflow.com/questions/39162178/kolmogorov-smirnov-plot-in-r-ggplot + + +m1_ecdf = ecdf(data1) +m2_ecdf = ecdf(data2) +val_range = range(data1, data2) +step = (val_range[2] - val_range[1])/100 +vals = seq(val_range[1], val_range[2], step) + + +m1_cdf = m1_ecdf(vals) +m2_cdf = m2_ecdf(vals) + +cdfs = data.frame(vals, + m1_cdf, + m2_cdf) + +ks_point = which.max(abs(cdfs$m1_cdf - cdfs$m2_cdf)) +ks_point_info = cdfs[ks_point,] +##message("KS point info: ", paste(ks_point_info, collapse=', ')) + +cdfs = cdfs %>% gather('m1_cdf', 'm2_cdf', key='type', value='cdf') + + +ggplot(cdfs, aes(x=vals, y=cdf)) + + geom_line(aes(color=type, linetype=type)) + + geom_segment(aes(x=ks_point_info$vals, + y=ks_point_info$m1_cdf, + xend=ks_point_info$vals, + yend=ks_point_info$m2_cdf), color='magenta', size=2) + + ggtitle(sprintf("%s vs. %s KS", args$matrix1, args$matrix2)) + xlab("number") + ylab("cdf") + + + + + diff --git a/scripts/QQ_matrix_comparison.R b/scripts/QQ_matrix_comparison.R new file mode 100755 index 00000000..e54e1dcd --- /dev/null +++ b/scripts/QQ_matrix_comparison.R @@ -0,0 +1,30 @@ +#!/usr/bin/env Rscript + +set.seed(1234) + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--matrix1", required=T, nargs=1) +parser$add_argument("--matrix2", required=T, nargs=1) +parser$add_argument("--log", required=F, default=FALSE, action="store_true") +parser$add_argument("--output", required=T, nargs=1, help="output filename png") + +args = parser$parse_args() + + +#' learn distribution parameters: +data1 = as.matrix(read.table(args$matrix1, header=T, row.names=1)) +data2 = as.matrix(read.table(args$matrix2, header=T, row.names=1)) + + +png(args$output) +if (args$log) { + data1 = log(data1+1) + data2 = log(data2+1) +} +qqplot(data1, data2) +abline(a=0,b=1, col='red') + + + diff --git a/scripts/apply_median_filtering.R b/scripts/apply_median_filtering.R new file mode 100755 index 00000000..de010c83 --- /dev/null +++ b/scripts/apply_median_filtering.R @@ -0,0 +1,25 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--infercnv_obj", help="infercnv_obj file", required=TRUE, nargs=1) +parser$add_argument("--window_size", help="window size", required=FALSE, type='integer', default=11) +args = parser$parse_args() + +library(infercnv) +library(ggplot2) + +infercnv_obj_file = args$infercnv_obj + +infercnv_obj = readRDS(infercnv_obj_file) + +infercnv_obj = infercnv:::.subcluster_tumors_general(infercnv_obj) + +mf_infercnv_obj = infercnv:::.apply_heatmap_median_filtering(infercnv_obj, window_size=args$window_size) + +saveRDS(mf_infercnv_obj, file=sprintf("%s-median_filtered.W%d.obj", infercnv_obj_file, args$window_size) ) + +plot_cnv(mf_infercnv_obj, output_filename=paste0(infercnv_obj_file, sprintf(".mf.W%d", args$window_size))) + + diff --git a/scripts/boxplot_cell_exprs.R b/scripts/boxplot_cell_exprs.R new file mode 100755 index 00000000..0fe04407 --- /dev/null +++ b/scripts/boxplot_cell_exprs.R @@ -0,0 +1,51 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--infercnv_obj", help="infercnv_obj file", required=TRUE, nargs=1) +parser$add_argument("--log", help="log(x+1) transform expr", action='store_true', default=FALSE) + +args = parser$parse_args() + +library(infercnv) +library(ggplot2) +library(tidyverse) + +infercnv_obj_file = args$infercnv_obj + +infercnv_obj = readRDS(infercnv_obj_file) + +expr.data = infercnv_obj@expr.data + +if (args$log) { + expr.data = log(expr.data+1) +} + +## build df of expr values. +cell_groups = c(infercnv_obj@reference_grouped_cell_indices, infercnv_obj@observation_grouped_cell_indices) + +cell_group_names = names(cell_groups) + + +pngname = sprintf("%s-boxplot.png", infercnv_obj_file) +png(pngname) + +expr_tibble = do.call(rbind, lapply(cell_group_names, function(cell_group_name) { + cell_group_expr = expr.data[, cell_groups[[ cell_group_name ]] ] + + cell_group_expr = as.tibble(cell_group_expr) + + cell_group_expr = cell_group_expr %>% gather(key='cellname', value='expr') + + cell_group_expr = cell_group_expr %>% mutate(group_name=cell_group_name) +})) + + + +p = expr_tibble %>% ggplot(aes(y=expr, x=cellname, color=group_name)) + geom_boxplot(outlier.shape=NA) + facet_wrap(~group_name, scales='free_x') + +plot(p) + +saveRDS(expr_tibble, 'my.tibble.obj') + diff --git a/scripts/check_matrix_format.py b/scripts/check_matrix_format.py index 6fd561aa..f763b6fa 100755 --- a/scripts/check_matrix_format.py +++ b/scripts/check_matrix_format.py @@ -1,8 +1,7 @@ #!/usr/bin/env python -""" -Coverts a square expression matrix to an R-format compatible expression matrix +"""Converts a square expression matrix to an R-format compatible expression matrix """ @@ -62,8 +61,8 @@ def convert_matrix_format(input_matrix, delimiter, output_name): # Parse arguments prsr_arguments = argparse.ArgumentParser(prog='check_matrix_format.py', - description='Coverts a square expression matrix to an R-compatible expression matrix.', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) + description=__doc__, # Use text from file summary up top + formatter_class=argparse.RawDescriptionHelpFormatter) # Add positional argument prsr_arguments.add_argument("--input_matrix", metavar="input_matrix", diff --git a/scripts/cross_cell_scaling_normalization.R b/scripts/cross_cell_scaling_normalization.R new file mode 100755 index 00000000..b2dfc2a9 --- /dev/null +++ b/scripts/cross_cell_scaling_normalization.R @@ -0,0 +1,32 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--infercnv_obj", help="infercnv_obj file", required=TRUE, nargs=1) +parser$add_argument("--log", help="log transform expr", action='store_true', default=FALSE) + +args = parser$parse_args() + +library(infercnv) + +infercnv_obj_file = args$infercnv_obj + +infercnv_obj = readRDS(infercnv_obj_file) + +expr.data = infercnv_obj@expr.data + + +## do upper quartile normalization +upper_quart = apply(expr.data, 2, quantile, probs=0.75) +mean_upper_quart = mean(upper_quart) +revised.expr.data = sweep(expr.data, 2, mean_upper_quart/upper_quart, "*") + +new_upper_quart = apply(revised.expr.data, 2, quantile, probs=0.75) + +print(new_upper_quart) + +infercnv_obj@expr.data = revised.expr.data + +saveRDS(infercnv_obj, 'rescaled.obj') + diff --git a/scripts/dropout_matrix_comparison.R b/scripts/dropout_matrix_comparison.R new file mode 100755 index 00000000..81d82929 --- /dev/null +++ b/scripts/dropout_matrix_comparison.R @@ -0,0 +1,80 @@ +#!/usr/bin/env Rscript + +set.seed(1234) + +suppressPackageStartupMessages(library("argparse")) + +library(tidyverse) +library(infercnv) + +parser = ArgumentParser() +parser$add_argument("--matrix1", required=T, nargs=1) +parser$add_argument("--matrix2", required=T, nargs=1) +parser$add_argument("--output", required=T, nargs=1, help="output filename pdf") + +args = parser$parse_args() + + +#' learn distribution parameters: +data1 = as.matrix(read.table(args$matrix1, header=T, row.names=1)) +data2 = as.matrix(read.table(args$matrix2, header=T, row.names=1)) + +## total sum normalize each: +median_cs = median(colSums(data1), colSums(data2)) +data1 <- infercnv:::.normalize_data_matrix_by_seq_depth(data1, median_cs) +data2 <- infercnv:::.normalize_data_matrix_by_seq_depth(data2, median_cs) + + +pdf(args$output) + + +data1.mean_vs_p0 <- infercnv:::.get_mean_vs_p0_from_matrix(data1) +data2.mean_vs_p0 <- infercnv:::.get_mean_vs_p0_from_matrix(data2) + +plot_mean_vs_p0_with_data <- function(title='title', mean_vs_p0_table) { + + logm <- log(mean_vs_p0_table$m + 1) + p0 <- mean_vs_p0_table$p0 + + plot(logm, p0, pch='.', main=title) + + x_approx_mid <- median(logm[which(p0>0.2 & p0 < 0.8)]) + + x <- logm + y <- p0 + df <- data.frame(x,y) + + fit <- nls(y ~ infercnv:::.logistic(x, x0 = x0, k = k), data = df, + start = list(x0 = x_approx_mid, k = -1)) + + logistic_x <- x + logistic_y <- predict(fit, newdata=x) + points(x, logistic_y, col='green') + + ## also try fitting a spline + spline.fit <- smooth.spline(x,y) + spline.pts = predict(spline.fit, newdata=x) + points(spline.pts$x, spline.pts$y, col='magenta') + legend('topright', c('logistic', 'spline'), col=c('green', 'magenta'), pch=1) + + ret = list(logistic_x = logistic_x, + logistic_y = logistic_y, + spline_x <- spline.pts$x, + spline_y <- spline.pts$y) + + + return(ret) +} + + +p1 <- plot_mean_vs_p0_with_data(args$matrix1, data1.mean_vs_p0) +p2 <- plot_mean_vs_p0_with_data(args$matrix2, data2.mean_vs_p0) + + +## plot both logistics in a single plot +plot(p1$logistic_x, p1$logistic_y, col='blue') +points(p2$logistic_x, p2$logistic_y, col='magenta') +legend('topright', c(args$matrix1, args$matrix2), col=c('blue', 'magenta'), pch=1) + + + diff --git a/scripts/examine_dropout_logistic.R b/scripts/examine_dropout_logistic.R new file mode 100755 index 00000000..fec9cf25 --- /dev/null +++ b/scripts/examine_dropout_logistic.R @@ -0,0 +1,83 @@ +#!/usr/bin/env Rscript + +args<-commandArgs(TRUE) + +if (length(args) == 0) { + stop("Error, require params: infercnv.obj"); +} + +infercnv_obj_file = args[1] + +pdf(paste0(infercnv_obj_file, '.dropout.pdf')) + +infercnv_obj = readRDS(infercnv_obj_file) + + +library(edgeR) +library(fitdistrplus) +library(infercnv) + +# borrowing some code from splatter + +get_parameters <- function(group_name, expr.matrix) { + + params = list() + params[['group_name']] = group_name + + # estimate gamma for genes + lib.sizes <- colSums(expr.matrix) + lib.med <- median(lib.sizes) + norm.counts <- t(t(expr.matrix) / lib.sizes * lib.med) + norm.counts <- norm.counts[rowSums(norm.counts > 0) > 1, ] + + + # estimate dropout params + mean_vs_p0_table = infercnv:::.get_mean_vs_p0_from_matrix(expr.matrix) + logistic_params = infercnv:::.get_logistic_params(mean_vs_p0_table) + + params[['dropout.logistic.midpt']] = logistic_params$midpt + params[['dropout.logistic.slope']] = logistic_params$slope + + + + mean_vs_p0_table = cbind(mean_vs_p0_table, logm=log(mean_vs_p0_table$m + 1)) + smoothScatter(mean_vs_p0_table$logm, mean_vs_p0_table$p0, main=group_name) + points(mean_vs_p0_table$logm, + infercnv:::.logistic(mean_vs_p0_table$logm, logistic_params$midpt, logistic_params$slope), col='red') + + + midpt_use = mean(mean_vs_p0_table$logm[mean_vs_p0_table$p0>0.48 & mean_vs_p0_table$p0<0.52]) + + points(mean_vs_p0_table$logm, + infercnv:::.logistic(mean_vs_p0_table$logm, midpt_use, logistic_params$slope), col='magenta') + + + s = smooth.spline(mean_vs_p0_table$logm, mean_vs_p0_table$p0) + r = range(mean_vs_p0_table$logm) + x=seq(r[1], r[2], 0.1) + points(x, predict(s, x)$y, col='orange') + + + return(params) + +} + + + + +# examine each group +all_groups = c(infercnv_obj@observation_grouped_cell_indices, infercnv_obj@reference_grouped_cell_indices) +all_groups[['combined_normal']] <- unlist(infercnv_obj@reference_grouped_cell_indices) + +for (group in names(all_groups)) { + + group_idxs = all_groups[[ group ]] + expr.data = infercnv_obj@expr.data[, group_idxs] + + params = get_parameters(group, expr.data) + params = t(as.data.frame(params)) + + print(params) + +} + diff --git a/scripts/examine_infercnv_data_params.R b/scripts/examine_infercnv_data_params.R index ddd8ccb3..a911dee3 100755 --- a/scripts/examine_infercnv_data_params.R +++ b/scripts/examine_infercnv_data_params.R @@ -6,26 +6,21 @@ if (length(args) == 0) { stop("Error, require params: infercnv.obj"); } -infercnv_file_obj = args[1] +infercnv_obj_file = args[1] -load(infercnv_file_obj) - - -infercnv_name_obj = grep("infercnv_obj", ls(), value=T)[1] - -print(infercnv_name_obj) - -infercnv_obj = get(infercnv_name_obj) +infercnv_obj = readRDS(infercnv_obj_file) library(edgeR) library(fitdistrplus) library(infercnv) +library(Matrix) # borrowing some code from splatter get_parameters <- function(group_name, expr.matrix) { + message(sprintf("getting params for: %s", group_name)) params = list() params[['group_name']] = group_name @@ -35,11 +30,12 @@ get_parameters <- function(group_name, expr.matrix) { norm.counts <- t(t(expr.matrix) / lib.sizes * lib.med) norm.counts <- norm.counts[rowSums(norm.counts > 0) > 1, ] + ## note, fitting the gamma is done differently in splatter... using method = "mge", gof = "CvM", and first winsorizing the data at q=0.1 means <- rowMeans(norm.counts) means.fit <- fitdistrplus::fitdist(means, "gamma", method = "mme") mean.shape = unname(means.fit$estimate["shape"]) mean.rate = unname(means.fit$estimate["rate"]) - + params[[ 'gamma.mean.shape' ]] = mean.shape params[[ 'gamma.mean.rate' ]] = mean.rate @@ -67,6 +63,7 @@ get_parameters <- function(group_name, expr.matrix) { # examine each group all_groups = c(infercnv_obj@observation_grouped_cell_indices, infercnv_obj@reference_grouped_cell_indices) +all_groups[['combined_normal']] <- unlist(infercnv_obj@reference_grouped_cell_indices) for (group in names(all_groups)) { diff --git a/scripts/examine_infercnv_data_params.just_dispersion.R b/scripts/examine_infercnv_data_params.just_dispersion.R new file mode 100755 index 00000000..2e574ad6 --- /dev/null +++ b/scripts/examine_infercnv_data_params.just_dispersion.R @@ -0,0 +1,56 @@ +#!/usr/bin/env Rscript + +args<-commandArgs(TRUE) + +if (length(args) == 0) { + stop("Error, require params: infercnv.obj"); +} + +infercnv_obj_file = args[1] + +infercnv_obj = readRDS(infercnv_obj_file) + + +library(edgeR) +library(fitdistrplus) +library(infercnv) +library(Matrix) + +# borrowing some code from splatter + +get_parameters <- function(group_name, expr.matrix) { + + message(sprintf("getting params for: %s", group_name)) + params = list() + params[['group_name']] = group_name + + + # estimate common dispersion + design <- matrix(1, ncol(expr.matrix), 1) + disps <- edgeR::estimateDisp(expr.matrix, design = design) + + params[[ 'common.dispersion' ]] = disps$common.dispersion + + + return(params) + +} + + + +# examine each group +all_groups = c(infercnv_obj@observation_grouped_cell_indices, infercnv_obj@reference_grouped_cell_indices) +all_groups[['combined_normal']] <- unlist(infercnv_obj@reference_grouped_cell_indices) + +for (group in names(all_groups)) { + + group_idxs = all_groups[[ group ]] + expr.data = infercnv_obj@expr.data[, group_idxs] + + params = get_parameters(group, expr.data) + params = t(as.data.frame(params)) + + print(params) + +} + diff --git a/scripts/examine_normal_cutoffs_vs_KS.R b/scripts/examine_normal_cutoffs_vs_KS.R new file mode 100755 index 00000000..9f5f41da --- /dev/null +++ b/scripts/examine_normal_cutoffs_vs_KS.R @@ -0,0 +1,82 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--infercnv_obj", help="infercnv_obj file", required=TRUE, nargs=1) +parser$add_argument("--scale", help="scale", action='store_true', default=FALSE) +parser$add_argument("--subtract", help="subtract", action='store_true', default=FALSE) +parser$add_argument("--smooth", help="smooth", action='store_true', default=TRUE) +parser$add_argument("--show_tumor", help="show tumor instead of normal", action='store_true', default=FALSE) +parser$add_argument("--output", help="name of output png file", required=TRUE) + +args = parser$parse_args() + +library(infercnv) +library(tidyverse) +library(futile.logger) + +infercnv_obj_file = args$infercnv_obj + +infercnv_obj = readRDS(infercnv_obj_file) + +if (! infercnv:::has_reference_cells(infercnv_obj)) { + stop("Error, cannot tune parameters without reference 'normal' cells defined") +} + +if (args$scale) { + infercnv_obj <- infercnv:::scale_infercnv_expr(infercnv_obj) +} + +if (args$subtract) { + infercnv_obj <- subtract_ref_expr_from_obs(infercnv_obj, inv_log=FALSE) +} + + +if (args$smooth) { + infercnv_obj <- smooth_by_chromosome(infercnv_obj, window_length=101, smooth_ends=TRUE) +} + +if (args$show_tumor) { + expr_vals <- infercnv_obj@expr.data[, unlist(infercnv_obj@observation_grouped_cell_indices)] +} else { + expr_vals <- infercnv_obj@expr.data[, unlist(infercnv_obj@reference_grouped_cell_indices)] +} + + +mu = mean(expr_vals) +sigma = sd(expr_vals) + +data.want = data.frame(vals=as.numeric(expr_vals)) + +mean_delta = infercnv:::determine_mean_delta_via_Z(sigma, p=0.05) +KS_delta = infercnv:::get_HoneyBADGER_setGexpDev(gexp.sd=sigma, alpha=0.05) + + +png(args$output) + +message("plotting ncells distribution") + +message("mean delta: ", mean_delta) +message("KS_delta: ", KS_delta) + +p = data.want %>% ggplot(aes(vals)) + + geom_density(alpha=0.3) + +p = p + + stat_function(fun=dnorm, color='black', args=list('mean'=mu,'sd'=sigma)) + + +## add Z-based + +p = p + + stat_function(fun=dnorm, color='blue', args=list('mean'=mu-mean_delta,'sd'=sigma)) + + stat_function(fun=dnorm, color='blue', args=list('mean'=mu+mean_delta,'sd'=sigma)) + +## add KS-based + +p = p + + stat_function(fun=dnorm, color='magenta', args=list('mean'=mu-KS_delta,'sd'=sigma)) + + stat_function(fun=dnorm, color='magenta', args=list('mean'=mu+KS_delta,'sd'=sigma)) + +plot(p) diff --git a/scripts/examine_normal_sampling_distributions.R b/scripts/examine_normal_sampling_distributions.R new file mode 100755 index 00000000..7b0ea86f --- /dev/null +++ b/scripts/examine_normal_sampling_distributions.R @@ -0,0 +1,166 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--infercnv_obj", help="infercnv_obj file", required=TRUE, nargs=1) +args = parser$parse_args() + +library(infercnv) +library(tidyverse) +library(futile.logger) + +infercnv_obj_file = args$infercnv_obj + +infercnv_obj = readRDS(infercnv_obj_file) + +if (! infercnv:::has_reference_cells(infercnv_obj)) { + stop("Error, cannot tune parameters without reference 'normal' cells defined") +} + +expr_vals <- infercnv_obj@expr.data +mu = mean(expr_vals) +sigma = sd(expr_vals) +nrounds = 1000 +sds = c() +ngenes = nrow(expr_vals) + +normal_samples = infercnv_obj@reference_grouped_cell_indices + +num_normal_samples = length(normal_samples) + +mean_vals_df = NULL; +z_p_val = 0.05 + +num_cells_to_empirical_sd = list() + +ncells_partitions = seq (1,100,5) +for (ncells in ncells_partitions) { + means = c() + + message(sprintf("num cells: %g", ncells)) + + cells_counted = 0; + + for(i in 1:nrounds) { + ## pick a random gene + rand.gene = sample(1:ngenes) + + ## pick a random normal cell type + rand.sample = sample(num_normal_samples) + #rand.sample=1 + + vals = sample(expr_vals[rand.gene, normal_samples[[rand.sample]] ], size=ncells, replace=T) + m_val = mean(vals) + means = c(means, m_val) + + cells_counted = cells_counted + length(vals) + + + } + my.sd = sd(means) + sds = c(sds, my.sd) + + num_cells_to_empirical_sd[[ ncells ]] = my.sd + + df = data.frame(num_cells=ncells, vals=means) + #print(df) + if(is.null(mean_vals_df)) { + mean_vals_df = df + } else { + mean_vals_df = rbind(mean_vals_df, df) + } + +} + +## fit linear model +num_cells = ncells_partitions + +write.table(data.frame(num_cells=num_cells, sds=sds), file='num_cells_vs_sds.table.dat', quote=F, sep="\t") + + +fit = lm(log(sds) ~ log(num_cells)) #note, hbadger does something similar, but not for the hmm cnv state levels + +my.spline = smooth.spline(log(num_cells), log(sds)) + +message("plotting log(sd) vs. log(num_cells)") + +plot(log(num_cells), log(sds), main='log(sd) vs. log(num_cells)') + +plot(num_cells, sds, main='sd vs. num_cells') + +my.spline2 = smooth.spline(num_cells, sds) + +## store mean_delta for the single gene for convenience sake +mean_delta = qnorm(p=1-z_p_val, sd=sigma, mean=0) + +normal_sd_trend = list(mu=mu, + sigma=sigma, + fit=fit, + spline=my.spline, + mean_delta=mean_delta) + + + +### do some plotting + + +for (ncells in ncells_partitions) { + + message(sprintf("plotting ncells distribution: %g", ncells)) + + data.want = mean_vals_df %>% filter(num_cells == ncells) + + + p = data.want %>% ggplot(aes(vals, fill=num_cells)) + + geom_density(alpha=0.3) + + sigma <- exp(predict(normal_sd_trend$fit, + newdata=data.frame(num_cells=ncells))[[1]]) + + message("ncells:", ncells, " sigma: ", sigma) + + p = p + + stat_function(fun=dnorm, color='black', args=list('mean'=1,'sd'=sigma)) + + ggtitle(sprintf("num_cells: %g, sd: %g", ncells, sigma)) + + p = p + + stat_function(fun=dnorm, color='magenta', args=list('mean'=1,'sd'=num_cells_to_empirical_sd[[ ncells]] )) + + + pval=0.01 + + left_mean = 1 - 2 * (1-qnorm(p=pval, mean=1, sd=sigma)) + message("left_mean: ", left_mean) + p = p + + stat_function(fun=dnorm, color='blue', args=list('mean'=left_mean,'sd'=sigma)) + + + right_mean = 1 + 2 * (qnorm(p=1-pval, mean=1, sd=sigma)-1) + message("right_mean: ", right_mean) + p = p + + stat_function(fun=dnorm, color='blue', args=list('mean'=right_mean,'sd'=sigma)) + + + + + + if (FALSE) { + + spline.sd = exp(predict(my.spline, x=log(ncells))$y) + + + p = p + + stat_function(fun=dnorm, color='green', args=list('mean'=1,'sd'=spline.sd)) + + spline2.sd = predict(my.spline2, x=ncells)$y + + message(spline2.sd) + + p = p + + stat_function(fun=dnorm, color='orange', args=list('mean'=1,'sd'=spline2.sd)) + } + + plot(p) +} + diff --git a/scripts/examine_normal_sampling_distributions.i3.R b/scripts/examine_normal_sampling_distributions.i3.R new file mode 100755 index 00000000..e9029895 --- /dev/null +++ b/scripts/examine_normal_sampling_distributions.i3.R @@ -0,0 +1,108 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--infercnv_obj", help="infercnv_obj file", required=TRUE, nargs=1) +args = parser$parse_args() + +library(infercnv) +library(tidyverse) +library(futile.logger) + +infercnv_obj_file = args$infercnv_obj + +infercnv_obj = readRDS(infercnv_obj_file) + + +expr_vals <- infercnv_obj@expr.data + + +sd_trend_info = infercnv:::.i3HMM_get_sd_trend_by_num_cells_fit(infercnv_obj) + + +mu = sd_trend_info$mu +sigma = sd_trend_info$sigma + +sds = c() +ngenes = nrow(expr_vals) + +tumor_samples = infercnv_obj@observation_grouped_cell_indices + +print(tumor_samples) + +num_tumor_samples = length(tumor_samples) + +print(num_tumor_samples) + +mean_vals_df = NULL; +z_p_val = 0.05 + + +num_cells_to_empirical_sd = list() + +nrounds=100 + +ncells_partitions = seq (1,100,5) +for (ncells in ncells_partitions) { + means = c() + + message(sprintf("num cells: %g", ncells)) + + cells_counted = 0; + + for(i in 1:nrounds) { + ## pick a random gene + rand.gene = sample(1:ngenes, size=1) + + ## pick a random normal cell type + rand.sample = sample(1:num_tumor_samples, size=1) + #rand.sample=1 + #print(rand.sample) + + vals = sample(expr_vals[rand.gene, tumor_samples[[rand.sample]] ], size=ncells, replace=T) + m_val = mean(vals) + means = c(means, m_val) + + cells_counted = cells_counted + length(vals) + + } + means.sd = sd(means) + means.mean = mean(means) + + num_cells_to_empirical_sd[[ ncells ]] = means.sd + + df = data.frame(num_cells=ncells, vals=means) + + message(sprintf("plotting ncells distribution: %g", ncells)) + + data.want = df + + + p = data.want %>% ggplot(aes(vals, fill=num_cells)) + + geom_density(alpha=0.3) + + ggtitle(sprintf("num_cells: %g", ncells)) + + ## draw parameterized distribution + p = p + + stat_function(fun=dnorm, color='black', args=list('mean'=means.mean,'sd'=means.sd)) + + + alpha=0.05 + ks_delta = infercnv:::get_HoneyBADGER_setGexpDev(gexp.sd=sd_trend_info$sigma, k_cells=ncells, alpha=alpha, plot=T) + + left_mean = means.mean - ks_delta + message("left_mean: ", left_mean) + p = p + + stat_function(fun=dnorm, color='blue', args=list('mean'=left_mean,'sd'=means.sd)) + + + right_mean = means.mean + ks_delta + message("right_mean: ", right_mean) + p = p + + stat_function(fun=dnorm, color='blue', args=list('mean'=right_mean,'sd'=means.sd)) + + + plot(p) +} + diff --git a/scripts/examine_simulated_vs_observed_dispersion.R b/scripts/examine_simulated_vs_observed_dispersion.R new file mode 100755 index 00000000..2a6aa706 --- /dev/null +++ b/scripts/examine_simulated_vs_observed_dispersion.R @@ -0,0 +1,97 @@ +#!/usr/bin/env Rscript + +args<-commandArgs(TRUE) + +if (length(args) == 0) { + stop("Error, require params: infercnv.obj"); +} + +infercnv_obj_file = args[1] + +pdf(paste0(infercnv_obj_file, '.dispersion_estimation.pdf')) + +infercnv_obj = readRDS(infercnv_obj_file) + + +library(edgeR) +library(fitdistrplus) +library(infercnv) + +# examine each group +normal_grp_idx <- unlist(infercnv_obj@reference_grouped_cell_indices) +expr.matrix = infercnv_obj@expr.data[, normal_grp_idx] + + +## estimate dropout params +mean_vs_p0_table = infercnv:::.get_mean_vs_p0_from_matrix(expr.matrix) +logistic_params = infercnv:::.get_logistic_params(mean_vs_p0_table) + +iterations=1 +dispersion_params = c(0.01, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10) + +resultset=matrix(0, ncol=3, nrow=iterations*length(dispersion_params)) +colnames(resultset) = c('target', 'before_Zinf', 'after_Zinf') + + +row = 0 + + +for (common.dispersion in dispersion_params) { + message(sprintf("Exploring common.dispersion set at: %g", common.dispersion)) + for (iter in 1:iterations) { + message(sprintf("\titer: %d", iter)) + + row = row + 1 + + ## simulate w/o zero-inflation + sim_counts = infercnv:::.get_simulated_cell_matrix(mean_vs_p0_table$m, NULL, 100, common_dispersion=common.dispersion) + + ## estimate common disp from these data: + design <- matrix(1, ncol(sim_counts), 1) + + + disps <- edgeR::estimateDisp(sim_counts, design = design) + #print(sprintf("estimated disp before dropouts: %g", disps$common.dispersion)) + + resultset[row,1] <- common.dispersion + resultset[row,2] <- disps$common.dispersion + + + ## include zero-inflation + sim_counts = infercnv:::.get_simulated_cell_matrix(mean_vs_p0_table$m, mean_vs_p0_table, 100, + common_dispersion=common.dispersion) + + + disps <- edgeR::estimateDisp(sim_counts, design = design) + resultset[row,3] <- disps$common.dispersion + + } + + +} + + +resultset = as.data.frame(resultset) +print(resultset) +write.table(resultset, file=paste0(infercnv_obj_file, ".dispersion_estimation.dat"), quote=F, sep="\t") + +## examples: +## 10x: 0.221 + 1.05 * (true_dispersion) # colon single sample +## 0.223 + 1.05 * (true_dipersion) # multiple colon samples + +## smrtSeq: 0.95 + 1.56 * (true_dispersion) # oligodendro +## 1.073 + 1.628 * (true_dispersion) # melanoma + + +res.lm = lm(resultset[,3] ~ resultset[,1]) + +print(res.lm) + +coeff = res.lm$coefficients +intercept = coeff[1] +slope = coeff[2] + +plot(resultset[,1], resultset[,3], main=sprintf("y=%g + %g * x", intercept, slope), col='green') +points(resultset[,1], resultset[,2]) + + diff --git a/scripts/examine_simulated_vs_observed_dispersion.from_matrix.R b/scripts/examine_simulated_vs_observed_dispersion.from_matrix.R new file mode 100755 index 00000000..029d7deb --- /dev/null +++ b/scripts/examine_simulated_vs_observed_dispersion.from_matrix.R @@ -0,0 +1,92 @@ +#!/usr/bin/env Rscript + +args<-commandArgs(TRUE) + +if (length(args) == 0) { + stop("Error, require params: normal_cells.matrix"); +} + +matrix.file = args[1] + +pdf(paste0(matrix.file, '.dispersion_estimation.pdf')) + +library(edgeR) +library(fitdistrplus) +library(infercnv) + +expr.matrix = read.table(matrix.file) + + +## estimate dropout params +mean_vs_p0_table = infercnv:::.get_mean_vs_p0_from_matrix(expr.matrix) +logistic_params = infercnv:::.get_logistic_params(mean_vs_p0_table) + +iterations=1 +dispersion_params = c(0.01, 0.05, 0.1, 0.2, 0.5, 1, 2, 5, 10) + +resultset=matrix(0, ncol=3, nrow=iterations*length(dispersion_params)) +colnames(resultset) = c('target', 'before_Zinf', 'after_Zinf') + + +row = 0 + + +for (common.dispersion in dispersion_params) { + message(sprintf("Exploring common.dispersion set at: %g", common.dispersion)) + for (iter in 1:iterations) { + message(sprintf("\titer: %d", iter)) + + row = row + 1 + + ## simulate w/o zero-inflation + sim_counts = infercnv:::.get_simulated_cell_matrix(mean_vs_p0_table$m, NULL, 100, common_dispersion=common.dispersion) + + ## estimate common disp from these data: + design <- matrix(1, ncol(sim_counts), 1) + + + disps <- edgeR::estimateDisp(sim_counts, design = design) + #print(sprintf("estimated disp before dropouts: %g", disps$common.dispersion)) + + resultset[row,1] <- common.dispersion + resultset[row,2] <- disps$common.dispersion + + + ## include zero-inflation + sim_counts = infercnv:::.get_simulated_cell_matrix(mean_vs_p0_table$m, mean_vs_p0_table, 100, + common_dispersion=common.dispersion) + + + disps <- edgeR::estimateDisp(sim_counts, design = design) + resultset[row,3] <- disps$common.dispersion + + } + + +} + + +resultset = as.data.frame(resultset) +print(resultset) +write.table(resultset, file=paste0(matrix.file, ".dispersion_estimation.dat"), quote=F, sep="\t") + +## examples: +## 10x: 0.221 + 1.05 * (true_dispersion) # colon single sample +## 0.223 + 1.05 * (true_dipersion) # multiple colon samples + +## smrtSeq: 0.95 + 1.56 * (true_dispersion) # oligodendro +## 1.073 + 1.628 * (true_dispersion) # melanoma + + +res.lm = lm(resultset[,3] ~ resultset[,1]) + +print(res.lm) + +coeff = res.lm$coefficients +intercept = coeff[1] +slope = coeff[2] + +plot(resultset[,1], resultset[,3], main=sprintf("y=%g + %g * x", intercept, slope), col='green') +points(resultset[,1], resultset[,2]) + + diff --git a/scripts/explore_HMM_exec.R b/scripts/explore_HMM_exec.R new file mode 100755 index 00000000..e4639c23 --- /dev/null +++ b/scripts/explore_HMM_exec.R @@ -0,0 +1,177 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--infercnv_obj", help="infercnv_obj file", required=TRUE, nargs=1) +parser$add_argument("--chr", help='restrict to chr', required=FALSE, nargs=1, default=NULL) +args = parser$parse_args() + +library(infercnv) +library(futile.logger) +library(HiddenMarkov) + +infercnv_obj_file = args$infercnv_obj + +infercnv_obj = readRDS(infercnv_obj_file) + +cnv_mean_sd=infercnv:::get_spike_dists(infercnv_obj@.hspike) +cnv_level_to_mean_sd_fit=infercnv:::get_hspike_cnv_mean_sd_trend_by_num_cells_fit(infercnv_obj@.hspike) +transition_out_p=1e-6 +p_val=0.05 +hclust_method='ward.D2' + + +flog.info(sprintf("predict_CNV_via_HMM_on_tumor_subclusters(p_val=%g)", p_val)) +HMM_info <- infercnv:::.get_HMM(cnv_mean_sd, transition_out_p) +chrs = unique(infercnv_obj@gene_order$chr) +expr.data = infercnv_obj@expr.data +gene_order = infercnv_obj@gene_order +hmm.data = expr.data +hmm.data[,] = -1 #init to invalid state + +tumor_subclusters <- unlist(infercnv_obj@tumor_subclusters[["subclusters"]], recursive=F) +if (is.null(tumor_subclusters)) { + message("No subclusters defined, running per-sample instead") + tumor_subclusters <- infercnv_obj@observation_grouped_cell_indices +} + +if (! is.null(args$chr)) { + chrs = c(args$chr) +} + + +########################################## +#chrs = c('chr1') +########################################## + + +############################################## +## From HiddenMarkovPackage +getj <- function (x, j) { + if (is.null(x)) + return(NULL) + n <- length(x) + for (i in 1:n) x[[i]] <- x[[i]][j] + return(x) +} + + +local.Viterbi.dthmm <- function (object, ...) { + x <- object$x + dfunc <- HiddenMarkov:::makedensity(object$distn) + n <- length(x) + m <- nrow(object$Pi) # transition matrix + nu <- matrix(NA, nrow = n, ncol = m) # scoring matrix + y <- rep(NA, n) # final trace + pseudocount = 1e-20 + + object$pm$sd = max(object$pm$sd) + + emissions <- matrix(NA, nrow = n, ncol = m) + emissions_pre <- emissions + + ## init first row + emission <- pnorm(abs(x[1]-object$pm$mean)/object$pm$sd, log=T, lower.tail=F) + #emissions_pre[1,] <- emission + emissions_pre[1,] <- abs(x[1]-object$pm$mean)/object$pm$sd + + emission <- 1 / (-1 * emission) + emission <- emission / sum(emission) + + emissions[1,] <- log(emission) + + nu[1, ] <- log(object$delta) + # start probabilities + emissions[1,] + + logPi <- log(object$Pi) # convert transition matrix to log(p) + + for (i in 2:n) { + + matrixnu <- matrix(nu[i - 1, ], nrow = m, ncol = m) + + #nu[i, ] <- apply(matrixnu + logPi, 2, max) + + # dfunc(x=x[i], object$pm, getj(object$pn, i), + # log=TRUE) + + + #emission <- dfunc(x=x[i], object$pm, getj(object$pn, i), log=T) + ## normalize emission p-values + ## first add pseudcounts + #missions[i, ] <- emissions[i, ] + pseudocount + #emissions[i, ] <- emissions[i, ] / sum(emissions[i, ]) + + #emissions[i, ] <- log(emissions[i, ]) + + + emission <- pnorm(abs(x[i]-object$pm$mean)/object$pm$sd, log=T, lower.tail=F) + #emissions_pre[i,] <- emission + emissions_pre[i,] <- abs(x[i]-object$pm$mean)/object$pm$sd + + emission <- 1 / (-1 * emission) + emission <- emission / sum(emission) + + emissions[i, ] <- log(emission) + + nu[i, ] <- apply(matrixnu + logPi, 2, max) + emissions[i, ] + + #print(matrixnu) + #print(logPi) + } + if (any(nu[n, ] == -Inf)) + stop("Problems With Underflow") + + write.table(nu, file='nu.txt', quote=F, sep="\t") + write.table(emissions, file='emissions.txt', quote=F, sep="\t") + write.table(emissions_pre, file='emissions_pre.txt', quote=F, sep="\t") + + ## traceback + y[n] <- which.max(nu[n, ]) + + for (i in seq(n - 1, 1, -1)) + y[i] <- which.max(logPi[, y[i + 1]] + nu[i, ]) + + return(y) +} + + +########################################## + + +for (chr in chrs) { + print(chr) + chr_gene_idx = which(gene_order$chr == chr) + + ## run through each cell for this chromosome: + for (tumor_subcluster_name in names(tumor_subclusters)) { + print(tumor_subcluster_name) + tumor_subcluster_cells_idx <- tumor_subclusters[[tumor_subcluster_name]] + + gene_expr_vals = rowMeans(expr.data[chr_gene_idx,tumor_subcluster_cells_idx,drop=F]) + ##gene_expr_vals = apply(expr.data[chr_gene_idx,tumor_subcluster_cells_idx,drop=F], 1, median) + if (length(gene_expr_vals) < 2) { next; } + num_cells = length(tumor_subcluster_cells_idx) + + state_emission_params <- infercnv:::.get_state_emission_params(num_cells, cnv_mean_sd, cnv_level_to_mean_sd_fit) + print(state_emission_params) + print(gene_expr_vals) + + hmm <- HiddenMarkov::dthmm(gene_expr_vals, + HMM_info[['state_transitions']], + HMM_info[['delta']], + "norm", + state_emission_params) + + hmm_trace <- local.Viterbi.dthmm(hmm) + + print(hmm_trace) + + hmm.data[chr_gene_idx,tumor_subcluster_cells_idx] <- hmm_trace + + break + } +} + + + + diff --git a/scripts/explore_HMM_exec.hspike.R b/scripts/explore_HMM_exec.hspike.R new file mode 100755 index 00000000..cff48b43 --- /dev/null +++ b/scripts/explore_HMM_exec.hspike.R @@ -0,0 +1,161 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--infercnv_obj", help="infercnv_obj file", required=TRUE, nargs=1) +args = parser$parse_args() + +library(infercnv) +library(futile.logger) +library(HiddenMarkov) + +infercnv_obj_file = args$infercnv_obj + +infercnv_obj = readRDS(infercnv_obj_file) + +cnv_mean_sd=infercnv:::get_spike_dists(infercnv_obj@.hspike) +cnv_level_to_mean_sd_fit=infercnv:::get_hspike_cnv_mean_sd_trend_by_num_cells_fit(infercnv_obj@.hspike) +transition_out_p=1e-6 +p_val=0.05 +hclust_method='ward.D2' + + +flog.info(sprintf("predict_CNV_via_HMM_on_tumor_subclusters(p_val=%g)", p_val)) +HMM_info <- infercnv:::.get_HMM(cnv_mean_sd, transition_out_p) + +infercnv_obj = infercnv_obj@.hspike + +chrs = unique(infercnv_obj@gene_order$chr) +expr.data = infercnv_obj@expr.data +gene_order = infercnv_obj@gene_order +hmm.data = expr.data +hmm.data[,] = -1 #init to invalid state + +tumor_subclusters <- c(infercnv_obj@observation_grouped_cell_indices, infercnv_obj@reference_grouped_cell_indices) + + +########################################## +#chrs = c('chr1') +########################################## + + +############################################## +## From HiddenMarkovPackage +getj <- function (x, j) { + if (is.null(x)) + return(NULL) + n <- length(x) + for (i in 1:n) x[[i]] <- x[[i]][j] + return(x) +} + + +local.Viterbi.dthmm <- function (object, ...){ + x <- object$x + dfunc <- HiddenMarkov:::makedensity(object$distn) + n <- length(x) + m <- nrow(object$Pi) # transition matrix + nu <- matrix(NA, nrow = n, ncol = m) # scoring matrix + y <- rep(NA, n) # final trace + pseudocount = 1e-20 + + emissions <- matrix(NA, nrow = n, ncol = m) + + ## init first row + emission <- pnorm(abs(x[1]-object$pm$mean)/object$pm$sd, log=T, lower.tail=F) + emission <- 1 / (-1 * emission) + emission <- emission / sum(emission) + + emissions[1,] <- log(emission) + + nu[1, ] <- log(object$delta) + # start probabilities + emissions[1,] + + logPi <- log(object$Pi) # convert transition matrix to log(p) + + for (i in 2:n) { + + matrixnu <- matrix(nu[i - 1, ], nrow = m, ncol = m) + + #nu[i, ] <- apply(matrixnu + logPi, 2, max) + + # dfunc(x=x[i], object$pm, getj(object$pn, i), + # log=TRUE) + + + #emission <- dfunc(x=x[i], object$pm, getj(object$pn, i), log=T) + ## normalize emission p-values + ## first add pseudcounts + #missions[i, ] <- emissions[i, ] + pseudocount + #emissions[i, ] <- emissions[i, ] / sum(emissions[i, ]) + + #emissions[i, ] <- log(emissions[i, ]) + + + emission <- pnorm(abs(x[i]-object$pm$mean)/object$pm$sd, log=T, lower.tail=F) + emission <- 1 / (-1 * emission) + emission <- emission / sum(emission) + + emissions[i, ] <- log(emission) + + nu[i, ] <- apply(matrixnu + logPi, 2, max) + emissions[i, ] + + #print(matrixnu) + #print(logPi) + } + if (any(nu[n, ] == -Inf)) + stop("Problems With Underflow") + + write.table(nu, file='nu.txt', quote=F, sep="\t") + write.table(emissions, file='emissions.txt', quote=F, sep="\t") + + ## traceback + y[n] <- which.max(nu[n, ]) + + for (i in seq(n - 1, 1, -1)) + y[i] <- which.max(logPi[, y[i + 1]] + nu[i, ]) + + return(y) +} + + +########################################## + +#chrs = c("chr13") +for (chr in chrs) { + print(chr) + chr_gene_idx = which(gene_order$chr == chr) + + ## run through each cell for this chromosome: + for (tumor_subcluster_name in names(tumor_subclusters)) { + print(tumor_subcluster_name) + tumor_subcluster_cells_idx <- tumor_subclusters[[tumor_subcluster_name]] + + gene_expr_vals = rowMeans(expr.data[chr_gene_idx,tumor_subcluster_cells_idx,drop=F]) + ##gene_expr_vals = apply(expr.data[chr_gene_idx,tumor_subcluster_cells_idx,drop=F], 1, median) + + num_cells = length(tumor_subcluster_cells_idx) + + state_emission_params <- infercnv:::.get_state_emission_params(num_cells, cnv_mean_sd, cnv_level_to_mean_sd_fit) + print(state_emission_params) + print(gene_expr_vals) + + hmm <- HiddenMarkov::dthmm(gene_expr_vals, + HMM_info[['state_transitions']], + HMM_info[['delta']], + "norm", + state_emission_params) + + hmm_trace <- local.Viterbi.dthmm(hmm) + + print(hmm_trace) + + hmm.data[chr_gene_idx,tumor_subcluster_cells_idx] <- hmm_trace + + + } +} + + + + diff --git a/scripts/genome_smoothed_lineplots.R b/scripts/genome_smoothed_lineplots.R new file mode 100755 index 00000000..1edf703e --- /dev/null +++ b/scripts/genome_smoothed_lineplots.R @@ -0,0 +1,96 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--infercnv_obj", help="infercnv_obj file", required=TRUE, nargs=1) +args = parser$parse_args() + +library(infercnv) +library(ggplot2) +library(futile.logger) + +infercnv_obj_file = args$infercnv_obj + +infercnv_obj = readRDS(infercnv_obj_file) + +pdf(sprintf("%s.chr_lineplots.pdf", infercnv_obj_file)) + +normal_groups = infercnv_obj@reference_grouped_cell_indices +tumor_groups = infercnv_obj@observation_grouped_cell_indices + +expr.data = infercnv_obj@expr.data + +num_tumor_groups = length(tumor_groups) + +windowsizes = c(25,50,75,100) +num_windowsizes = length(windowsizes) +par(mfrow=c(num_windowsizes, 1)) + +library(tidyverse) + + +plotme <- function(normal_pts, tumor_pts, windowsize) { + + all_pts = c(normal_pts, tumor_pts) + + all_pts_names = names(all_pts) + + my.colors = rainbow(length(all_pts)) + + yrange = range(unlist(all_pts)) + + text.adj = 0.7 + for (i in 1:length(all_pts)) { + if (i == 1) { + plot(all_pts[[i]], t='l', col=my.colors[i], main=sprintf("windowsize: %g, tumor: %s", windowsize, all_pts_names[length(all_pts_names)]), ylim=yrange, + cex.lab=text.adj, cex.main=text.adj, cex.axis=text.adj) + } else { + points(all_pts[[i]], t='l', col=my.colors[i]) + } + } + abline(h=0) + legend('top', legend=all_pts_names, col=my.colors, pch=1, horiz=T, bty='n', cex=text.adj) + +} + + + +get_smoothed <- function(cell_idx, windowsize) { + group_expr_data = expr.data[, cell_idx] + smoothed = apply(group_expr_data, 2, caTools::runmean, k=windowsize) + smoothed_mean = rowMeans(smoothed) + + ## center it: + smoothed_mean = smoothed_mean - median(smoothed_mean) + + return(smoothed_mean) +} + +plot_chr_smooths <- function(tumor_type) { + + + tumor_pts = tumor_groups[[tumor_type]] + + + for (windowsize in windowsizes) { + message(sprintf("\t-plotting %s", tumor_type)) + + normal_pts = list() + for (normal_type in names(normal_groups)) { + normal_pts[[ normal_type ]] <- get_smoothed(normal_groups[[normal_type]], windowsize) + } + + tumor_pts = list() + tumor_pts[[ tumor_type ]] = get_smoothed(tumor_groups[[tumor_type]], windowsize) + plotme(normal_pts, tumor_pts, windowsize) + } +} + + + + +for (tumor_type in names(tumor_groups)) { + message(sprintf("plotting for %s", tumor_type)) + plot_chr_smooths(tumor_type) +} diff --git a/scripts/inferCNV.R b/scripts/inferCNV.R index a737c1cc..2b0095e1 100755 --- a/scripts/inferCNV.R +++ b/scripts/inferCNV.R @@ -1,15 +1,25 @@ #!/usr/bin/env Rscript +# To use inferCNV via command-line interface, first install inferCNV per Wiki, +# then run a command like the following: +# +# ./inferCNV.R \ +# --raw_counts_matrix="../example/oligodendroglioma_expression_downsampled.counts.matrix" \ +# --annotations_file="../example/oligodendroglioma_annotations_downsampled.txt" \ +# --gene_order_file="../example/gencode_downsampled.EXAMPLE_ONLY_DONT_REUSE.txt" \ +# --ref_group_names="Microglia/Macrophage,Oligodendrocytes (non-malignant)" \ +# --cutoff=1 \ +# --out_dir="output_cli" \ +# --cluster_by_groups \ +# --denoise +# --median_filter # Load libraries -library(ape) -library("RColorBrewer", character.only=TRUE) -library(GMD) library(optparse) library(logging) -if (!require('fastcluster')) { - warning("fastcluster library not available, using the default hclust method instead.") -} +#if (!require('fastcluster')) { +# warning("fastcluster library not available, using the default hclust method instead.") +#} library(infercnv) # Logging level choices @@ -83,14 +93,14 @@ check_arguments <- function(arguments){ stop("error, must specify acceptable --hclust_method") } - # Warn that an average of the samples is used in the absence of - # normal / reference samples - if (is.null(arguments$reference_observations)){ - logging::logwarn(paste(":: --reference_observations: No reference ", - "samples were given, the average of the samples ", - "will be used.", - sep="")) - } + # # Warn that an average of the samples is used in the absence of + # # normal / reference samples + # if (is.null(arguments$reference_observations)){ + # logging::logwarn(paste(":: --reference_observations: No reference ", + # "samples were given, the average of the samples ", + # "will be used.", + # sep="")) + # } # Make sure the threshold is centered. arguments$max_centered_expression <- abs(arguments$max_centered_expression) @@ -112,46 +122,9 @@ check_arguments <- function(arguments){ # if (! is.na(suppressWarnings(as.integer(arguments$name_ref_groups)))){ # arguments$name_ref_groups <- list(as.integer(arguments$name_ref_groups)) # } else { - if (! is.na(arguments$name_ref_groups)) { - # Warn references must be given. - if (is.null(arguments$reference_observations)){ - logging::logerror(paste(":: --ref_groups to use this function ", - "references must be given. ")) - stop(979) - } - # TODO need to check and make sure all reference indices are given. + if (! is.na(arguments$name_ref_groups)) { arguments$name_ref_groups <- unlist(strsplit(arguments$name_ref_groups,",")) - # if (length(num_str) == 1){ - # logging::logerror(paste(":: --ref_groups. If explicitly giving ", - # "indices, make sure to give atleast ", - # "two groups", sep ="")) - # stop(990) - # } - - # name_ref_groups <- list() - # for (name_token in name_str){ - # # token_numbers <- unlist(strsplit(num_token, ":")) - # # number_count <- length(token_numbers) - # if (number_count == 1){ - # singleton <- as.integer(number_count) - # name_ref_groups[[length(name_ref_groups) + 1]] <- singleton - # } else if (number_count == 2){ - # from <- as.integer(token_numbers[1]) - # to <- as.integer(token_numbers[2]) - # name_ref_groups[[length(name_ref_groups) + 1]] <- seq(from, to) - # } else { - # logging::logerror(paste(":: --ref_groups is expecting either ", - # "one number or a comma delimited list ", - # "of numbers or spans using ':'. ", - # "Examples include: --ref_groups 3 or ", - # " --ref_groups 1,3,5,6,3 or ", - # " --ref_groups 1:5,6:20 or ", - # " --ref_groups 1,2:5,6,7:10 .", sep="")) - # stop(999) - # } - # } - # arguments$name_ref_groups <- name_ref_groups } else { if(!is.null(arguments$num_ref_groups)) { @@ -164,354 +137,552 @@ check_arguments <- function(arguments){ return(arguments) } - # Command line arguments pargs <- optparse::OptionParser(usage=paste("%prog [options]", "--output_dir directory", "data_matrix genomic_positions")) -pargs <- optparse::add_option(pargs, c("--color_safe"), - type="logical", - default=FALSE, - action="store_true", - dest="use_color_safe", - metavar="Color_Safe", - help=paste("To support the needs of those who see ", - "colors differently, use this option to", - "change the colors to a palette visibly ", - "distinct to all color blindness. ", - " [Default %default]")) - -pargs <- optparse::add_option(pargs, c("--contig_lab_size"), - type="integer", - action="store", - default=1, - dest="contig_label_size", - metavar="Contig_Label_Size", - help=paste("Used to increase or decrease the text labels", - "for the X axis (contig names).", - "[Default %default]")) - pargs <- optparse::add_option(pargs, c("--cutoff"), type="numeric", - default=0, + default=1, action="store", dest="cutoff", metavar="Cutoff", - help=paste("A number >= 0 is expected. A cut off for", - "the average expression of genes to be used", - "for CNV inference (use the value before log2 transformation). [Default %default]")) + help=paste("A number >= 0 is expected. Cut-off for the min", + "average read counts per gene among reference cells.", + "[Default %default]")) -pargs <- optparse::add_option(pargs, c("--transform"), - type="logical", - default=FALSE, - action="store_true", - dest="log_transform", - metavar="LogTransform", - help=paste("Matrix is assumed to be Log2(TPM+1) ", - "transformed. If instead it is raw TPMs ", - "use this flag so that the data will be ", - "transformed. [Default %default]")) +pargs <- optparse::add_option(pargs, c("--min_cells_per_gene"), + type="numeric", + default=3, + action="store", + dest="min_cells_per_gene", + metavar="Minimum cells per gene", + help=paste("minimum number of reference cells requiring ", + "expression measurements to include the ", + "corresponding gene. [Default %default]")) -pargs <- optparse::add_option(pargs, c("--log_file"), +pargs <- optparse::add_option(pargs, c("--out_dir"), type="character", + default=".", action="store", - default=NA, - dest="log_file", - metavar="Log", - help=paste("File for logging. If not given,", - "logging will occur to console.", + dest="out_dir", + metavar="Output Directory", + help=paste("Path to directory to deposit outputs. ", "[Default %default]")) -pargs <- optparse::add_option(pargs, c("--delim"), - type="character", +pargs <- optparse::add_option(pargs, c("--window_length"), + type="numeric", + default=101, action="store", - default="\t", - dest="delim", - metavar="Delimiter", - help=paste("Delimiter for reading expression matrix", - " and writing matrices output.", + dest="window_length", + metavar="Window Length", + help=paste("Length of the window for the moving average ", + "(smoothing). Should be an odd integer.", "[Default %default]")) -pargs <- optparse::add_option(pargs, c("--log_level"), +pargs <- optparse::add_option(pargs, c("--smooth_method"), type="character", + default="pyramidinal", action="store", - default="INFO", - dest="log_level", - metavar="LogLevel", - help=paste("Logging level. Valid choices are", - paste(C_LEVEL_CHOICES,collapse=", "), + dest="smooth_method", + metavar="Smoothing Method", + help=paste("Method to use for smoothing: c(runmeans,pyramidinal)", "[Default %default]")) -pargs <- optparse::add_option(pargs, c("--noise_filter"), +pargs <- optparse::add_option(pargs, c("--num_ref_groups"), type="numeric", - default=0, - action="store", - dest="magnitude_filter", - metavar="Magnitude_Filter", - help=paste("A value must be atleast this much more or", - "less than the reference to be plotted", - "[Default %default].")) - -pargs <- optparse::add_option(pargs, c("--max_centered_expression"), - type="integer", - default=3, + default=NULL, action="store", - dest="max_centered_expression", - metavar="Max_centered_expression", - help=paste("This value and -1 * this value are used", - "as the maximum value expression that can", - "exist after centering data. If a value is", - "outside of this range, it is truncated to", - "be within this range [Default %default].")) - -pargs <- optparse::add_option(pargs, c("--obs_groups"), - type="character", + dest="num_ref_groups", + metavar="Number of reference groups", + help=paste("The number of reference groups or a list of", + "indices for each group of reference indices in", + "relation to reference_obs. ", + "[Default %default]")) + +pargs <- optparse::add_option(pargs, c("--ref_subtract_use_mean_bounds"), + type="logical", + default=TRUE, + action="store_false", + dest="ref_subtract_use_mean_bounds", + metavar="Reference Subtract use Mean Bounds", + help=paste("Determine means separately for each ref group, ", + "then remove intensities within bounds of means", + "[Default %default]", + "Otherwise, uses mean of the means across groups.")) + +pargs <- optparse::add_option(pargs, c("--cluster_by_groups"), + type="logical", + default=FALSE, + action="store_true", + dest="cluster_by_groups", + metavar="Cluster by Groups", + help=paste("If observations are defined according to groups ", + "(ie. patients), each group of cells will be ", + "clustered separately. ([Default %default]", + ", instead will use k_obs_groups setting)")) + +pargs <- optparse::add_option(pargs, c("--k_obs_groups"), + type="numeric", default=1, action="store", - dest="num_obs_groups", - metavar="Number_of_observation_groups", - help=paste("Number of groups in which to break ", - "the observations.", + dest="k_obs_groups", + metavar="K number of Observation groups", + help=paste("Number of groups in which to break the observations.", "[Default %default]")) -pargs <- optparse::add_option(pargs, c("--output_dir"), +pargs <- optparse::add_option(pargs, c("--hclust_method"), type="character", + default="ward.D2", action="store", - dest="output_dir", - metavar="Output_Directory", - help=paste("Output directory for analysis products.", - "[Default %default][REQUIRED]")) + dest="hclust_method", + metavar="Hierarchical Clustering Method", + help=paste("Method used for hierarchical clustering of cells. ", + "Valid choices are: \"ward.D\", \"ward.D2\", \"single\"", + ", \"complete\", \"average\", \"mcquitty\", \"median\", \"centroid\". ", + "[Default %default]")) -pargs <- optparse::add_option(pargs, c("--ref"), +pargs <- optparse::add_option(pargs, c("--analysis_mode"), type="character", - default=NULL, + default="samples", action="store", - dest="reference_observations", - metavar="Input_reference_observations", - help=paste("Tab delimited characters are expected.", - "Names of the subset each sample ( data's", - "columns ) is part of.", + dest="analysis_mode", + metavar="Analysis Mode", + help=paste("options(samples|subclusters|cells), ", + "Grouping level for image filtering or HMM predictions.", + "[Default %default] (fastest, but subclusters is ideal)")) + +pargs <- optparse::add_option(pargs, c("--max_centered_threshold"), + type="numeric", + default=3, + action="store", + dest="max_centered_threshold", + metavar="Max Centered Threshold", + help=paste("The maximum value a value can have after", + "centering. Also sets a lower bound of -1 * this value. ", "[Default %default]")) -pargs <- optparse::add_option(pargs, c("--num_ref_groups"), - type="integer", - default=NULL, +pargs <- optparse::add_option(pargs, c("--scale_data"), + type="logical", + default=FALSE, + action="store_true", + dest="scale_data", + metavar="Scale Data", + help=paste("perform Z-scaling of logtransformed data ", + "[Default %default]. ", + "This may be turned on if you have very different ", + "kinds of data for your normal and tumor samples. ", + "For example, you need to use GTEx representative ", + "normal expression profiles rather than being able ", + "to leverage normal single cell data that ", + "goes with your experiment.")) + +pargs <- optparse::add_option(pargs, c("--HMM"), + type="logical", + default=FALSE, + action="store_true", + dest="HMM", + metavar="HMM", + help=paste("when set to True, runs HMM to predict CNV level. ", + "[Default %default]")) + +pargs <- optparse::add_option(pargs, c("--HMM_transition_prob"), + type="numeric", + default=1e-6, action="store", - dest="num_ref_groups", - metavar="Number_of_reference_groups", - help=paste("Define a number of groups to", - "make automatically by unsupervised", - "clustering. This ignores annotations", - "within references, but does not", - "mix them with observations.", + dest="HMM_transition_prob", + metavar="HMM Transition Probabiltie", + help=paste("transition probability in HMM", + "[Default %default]")) + +pargs <- optparse::add_option(pargs, c("--tumor_subcluster_pval"), + type="numeric", + default=0.01, + action="store", + dest="tumor_subcluster_pval", + metavar="Tumor Subcluster p-value", + help=paste("Max p-value for defining a significant tumor subcluster. ", "[Default %default]")) -pargs <- optparse::add_option(pargs, c("--ref_groups"), +pargs <- optparse::add_option(pargs, c("--HMM_report_by"), + type="character", + default="subcluster", + action="store", + dest="HMM_report_by", + metavar="HMM report by", + help=paste("c(cell, consensus, subcluster)", + "[Default %default]", + "Note, reporting is performed entirely", + " separately from the HMM prediction. ", + "So, you can predict on subclusters, but ", + "get per-cell level reporting (more voluminous output).")) + +pargs <- optparse::add_option(pargs, c("--HMM_type"), type="character", default=NULL, action="store", - dest="name_ref_groups", - metavar="Name_of_reference_groups", - help=paste("Names of groups from --ref table whose cells", - "are to be used as reference groups.", - "[REQUIRED]")) + dest="HMM_type", + metavar="HMM type", + help=paste("HMM model type. Options: (i6 or i3):", + "i6: infercnv 6-state model (0, 0.5, 1,", + " 1.5, 2, >2) where state emissions are ", + "calibrated based on simulated CNV levels.\n", + "i3: infercnv 3-state model (del, neutral, amp) ", + "configured based on normal cells and HMM_i3_z_pval.\n", + "[Default %default]")) -pargs <- optparse::add_option(pargs, c("--ref_subtract_method"), - type="character", - default="by_mean", +# pargs <- optparse::add_option(pargs, c("--HMM_i3_z_pval"), +# type="numeric", +# default=0.05, +# action="store", +# dest="HMM_i3_z_pval", +# metavar="HMM i3 z p-value", +# help=paste("p-value for HMM i3 state overlap", +# "[Default %default]")) + +pargs <- optparse::add_option(pargs, c("--denoise"), + type="logical", + default=FALSE, + action="store_true", + dest="denoise", + metavar="Denoise", + help=paste("If True, turns on denoising according to options below", + "[Default %default]")) + +pargs <- optparse::add_option(pargs, c("--noise_filter"), + type="numeric", + default=NA, + action="store", + dest="noise_filter", + metavar="Noise Filter", + help=paste("Values +- from the reference cell mean will ", + "be set to zero (whitening effect)", + "[Default %default, instead will use ", + "sd_amplifier below.]")) + +pargs <- optparse::add_option(pargs, c("--sd_amplifier"), + type="numeric", + default=1.0, action="store", - dest="ref_subtract_method", - metavar="Reference_Subtraction_Method", - help=paste("Method used to subtract the reference values from the observations. Valid choices are: ", - paste(C_REF_SUBTRACT_METHODS, collapse=", "), - " [Default %default]")) + dest="sd_amplifier", + metavar="SD denoise amplifier", + help=paste("Noise is defined as mean(reference_cells) ", + "+- sdev(reference_cells) * sd_amplifier ", + "[Default %default]")) -pargs <- optparse::add_option(pargs, c("--hclust_method"), +pargs <- optparse::add_option(pargs, c("--noise_logistic"), + type="logical", + default=TRUE, + action="store_false", + dest="noise_logistic", + metavar="Noise Logistic", + help=paste("use the noise_filter or sd_amplifier ", + "based threshold (whichever is invoked) ", + "as the midpoint in alogistic model for ", + "downscaling values close to the mean. ", + "[Default %default]")) + +pargs <- optparse::add_option(pargs, c("--outlier_method_bound"), type="character", - default="complete", + default="average_bound", action="store", - dest="hclust_method", - metavar="Hierarchical_Clustering_Method", - help=paste("Method used for hierarchical clustering of cells. Valid choices are: ", - paste(C_HCLUST_METHODS, collapse=", "), - " [Default %default]")) + dest="outlier_method_bound", + metavar="Outlier Method Bound", + help=paste("Method to use for bounding outlier values. ", + "[Default %default]", + "Will preferentially use outlier_lower_bound ", + "and outlier_upper_bound if set.")) + +pargs <- optparse::add_option(pargs, c("--outlier_lower_bound"), + type="numeric", + default=NA, + action="store", + dest="outlier_lower_bound", + metavar="Outlier Lower Bound", + help=paste("Outliers below this lower bound ", + "will be set to this value.", + "[Default %default]")) +pargs <- optparse::add_option(pargs, c("--outlier_upper_bound"), + type="numeric", + default=NA, + action="store", + dest="outlier_upper_bound", + metavar="Outlier Upper Bound", + help=paste("Outliers above this upper bound ", + "will be set to this value.", + "[Default %default]")) +pargs <- optparse::add_option(pargs, c("--plot_steps"), + type="logical", + default=FALSE, + action="store_true", + dest="plot_steps", + metavar="Plot Steps", + help=paste("If true, saves infercnv objects and ", + "plots data at the intermediate steps.", + "[Default %default]")) -pargs <- optparse::add_option(pargs,c("--obs_cluster_contig"), +pargs <- optparse::add_option(pargs, c("--final_scale_limits"), type="character", default=NULL, action="store", - dest="clustering_contig", - metavar="Clustering_Contig", - help=paste("When clustering observation samples, ", - "all genomic locations are used unless ", - "this option is given. The expected value ", - "is one of the contigs (Chr) in the genomic ", - "positions file (case senstive). All genomic ", - "positions will be plotted but only the given ", - "contig will be used in clustering / group ", - "creation.")) - -pargs <- optparse::add_option(pargs, c("--steps"), + dest="final_scale_limits", + metavar="Final Scale Limits", + help=paste("The scale limits for the final heatmap ", + "output by the run() method. ", + "[Default %default] ", + " Alt, c(low,high)")) + +pargs <- optparse::add_option(pargs, c("--final_center_val"), + type="numeric", + default=NULL, + action="store", + dest="final_center_val", + metavar="Final Center Value", + help=paste("Center value for final heatmap output ", + "by the run() method.", + "[Default %default]")) + +pargs <- optparse::add_option(pargs, c("--debug"), type="logical", default=FALSE, action="store_true", - dest="plot_steps", - metavar="plot_steps", - help=paste("Using this argument turns on plotting ", - "intemediate steps. The plots will occur ", - "in the same directory as the output pdf. ", - "Please note this option increases the time", - " needed to run [Default %default]")) - -pargs <- optparse::add_option(pargs, c("--vis_bound_method"), - type="character", - default="average_bound", + dest="debug", + metavar="Debug", + help=paste("If true, output debug level logging.", + "[Default %default]")) + +pargs <- optparse::add_option(pargs, c("--num_threads"), + type="numeric", + default=4, action="store", - dest="bound_method_vis", - metavar="Outlier_Removal_Method_Vis", - help=paste("Method to automatically detect and bound", - "outliers. Used for visualizing. If both", - "this argument and ", - "--vis_bound_threshold are given, this will", - "not be used. Valid choices are", - paste(C_VIS_OUTLIER_CHOICES, collapse=", "), - " [Default %default]")) - -pargs <- optparse::add_option(pargs, c("--vis_bound_threshold"), + dest="num_threads", + metavar="Number of Threads", + help=paste("(int) number of threads for parallel steps. ", + "[Default %default]")) + +pargs <- optparse::add_option(pargs, c("--raw_counts_matrix"), type="character", - default=NA, + default=NULL, action="store", - dest="bound_threshold_vis", - metavar="Outlier_Removal_Threshold_Vis", - help=paste("Used as upper and lower bounds for values", - "in the visualization. If a value is", - "outside this bound it will be replaced by", - "the closest bound. Should be given in", - "the form of 1,1 (upper bound, lower bound)", + dest="raw_counts_matrix", + metavar="Raw Counts Expression Data", + help=paste("the matrix of genes (rows) vs. cells (columns) ", + "containing the raw counts. It'll be read via read.table()", "[Default %default]")) -pargs <- optparse::add_option(pargs, c("--window"), - type="integer", - default=101, +pargs <- optparse::add_option(pargs, c("--gene_order_file"), + type="character", + default=NULL, action="store", - dest="window_length", - metavar="Window_Lengh", - help=paste("Window length for the smoothing.", + dest="gene_order_file", + metavar="Gene Order File", + help=paste("data file containing the positions of ", + "each gene along each chromosome in the genome. ", "[Default %default]")) -pargs <- optparse::add_option(pargs, c("--tail"), - type="integer", - default=NA, +pargs <- optparse::add_option(pargs, c("--annotations_file"), + type="character", + default=NULL, action="store", - dest="contig_tail", - metavar="contig_tail", - help=paste("Contig tail to be removed.", + dest="annotations_file", + metavar="Annotation File", + help=paste("a description of the cells, indicating ", + "the cell type classifications. ", "[Default %default]")) -pargs <- optparse::add_option(pargs, c("--title"), +pargs <- optparse::add_option(pargs, c("--ref_group_names"), type="character", - default="Copy Number Variation Inference", + default=NULL, action="store", - dest="fig_main", - metavar="Figure_Title", - help=paste("Title of the figure.", + dest="ref_group_names", + metavar="Reference Groups Names", + help=paste("Names of groups from raw_counts_matrix whose cells", + "are to be used as reference groups.", "[Default %default]")) -pargs <- optparse::add_option(pargs, c("--title_obs"), +pargs <- optparse::add_option(pargs, c("--delim"), type="character", - default="Observations (Cells)", action="store", - dest="obs_main", - metavar="Observations_Title", - help=paste("Title of the observations matrix Y-axis.", + default="\t", + dest="delim", + metavar="Delimiter", + help=paste("Delimiter for reading expression matrix", + " and writing matrices output.", "[Default %default]")) -pargs <- optparse::add_option(pargs, c("--title_ref"), +pargs <- optparse::add_option(pargs, c("--max_cells_per_group"), + type="numeric", + default=NULL, + action="store", + dest="max_cells_per_group", + metavar="Max Cells per group", + help=paste("maximun number of cells to use per group. ", + "[Default %default] using all cells defined ", + "in the annotations_file. This option is useful ", + "for randomly subsetting the existing data ", + "for a quicker preview run, such as using ", + "50 cells per group instead of hundreds.")) + +pargs <- optparse::add_option(pargs, c("--log_file"), type="character", - default="References (Cells)", action="store", - dest="ref_main", - metavar="References_Title", - help=paste("Title of the references matrix Y-axis (if used).", + default=NA, + dest="log_file", + metavar="Log", + help=paste("File for logging. If not given,", + "logging will occur to console.", "[Default %default]")) -pargs <- optparse::add_option(pargs, c("--save"), +#pargs <- optparse::add_option(pargs, c("--contig_lab_size"), +# type="integer", +# action="store", +# default=1, +# dest="contig_label_size", +# metavar="Contig_Label_Size", +# help=paste("Used to increase or decrease the text labels", +# "for the X axis (contig names).", +# "[Default %default]")) + +#pargs <- optparse::add_option(pargs, c("--color_safe"), +# type="logical", +# default=FALSE, +# action="store_true", +# dest="use_color_safe", +# metavar="Color_Safe", +# help=paste("To support the needs of those who see ", +# "colors differently, use this option to", +# "change the colors to a palette visibly ", +# "distinct to all color blindness. ", +# " [Default %default]")) + +#pargs <- optparse::add_option(pargs, c("--title"), +# type="character", +# default="Copy Number Variation Inference", +# action="store", +# dest="fig_main", +# metavar="Figure_Title", +# help=paste("Title of the figure.", +# "[Default %default]")) + +#pargs <- optparse::add_option(pargs, c("--title_obs"), +# type="character", +# default="Observations (Cells)", +# action="store", +# dest="obs_main", +# metavar="Observations_Title", +# help=paste("Title of the observations matrix Y-axis.", +# "[Default %default]")) + +#pargs <- optparse::add_option(pargs, c("--title_ref"), +# type="character", +# default="References (Cells)", +# action="store", +# dest="ref_main", +# metavar="References_Title", +# help=paste("Title of the references matrix Y-axis (if used).", +# "[Default %default]")) + +#pargs <- optparse::add_option(pargs, c("--ngchm"), +# type="logical", +# action="store_true", +# default=FALSE, +# dest="ngchm", +# metavar="NextGen_HeatMap", +# help=paste("Create a Next Generation Clustered Heat Map")) + +#pargs <- optparse::add_option(pargs, c("--path_to_shaidyMapGen"), +# type="character", +# action="store", +# default=NULL, +# dest="path_to_shaidyMapGen", +# metavar="Path_To_ShaidyMapGenp", +# help=paste("This is the pathway to the java application ShaidyMapGen.jar.", +# "If this is not assigned, then an enviornmental variable that ", +# "contains the ")) + +#pargs <- optparse::add_option(pargs, c("--gene_symbol"), +# type="character", +# action="store", +# default=NULL, +# dest="gene_symbol", +# metavar="Gene_Symbol", +# help=paste("The labeling type used to represent the genes in the expression", +# "data. This needs to be passed in order to add linkouts to the ", +# "genes. Possible gene label types to choose from are specified on", +# "the broadinstitute/inferCNV wiki and bmbroom/NGCHM-config-biobase.")) + +pargs <- optparse::add_option(pargs, c("--no_plot"), type="logical", - action="store_true", default=FALSE, - dest="save", - metavar="save", - help="Save workspace as infercnv.Rdata") + action="store_true", + dest="no_plot", + metavar="No Plot", + help=paste("don't make any of the images.", + "Instead, generate all non-image outputs as part of the run.", + "[Default %default]")) -pargs <- optparse::add_option(pargs, c("--ngchm"), +pargs <- optparse::add_option(pargs, c("--no_prelim_plot"), type="logical", - action="store_true", default=FALSE, - dest="ngchm", - metavar="NextGen_HeatMap", - help=paste("Create a Next Generation Clustered Heat Map")) - -pargs <- optparse::add_option(pargs, c("--path_to_shaidyMapGen"), - type="character", - action="store", - default=NULL, - dest="path_to_shaidyMapGen", - metavar="Path_To_ShaidyMapGenp", - help=paste("This is the pathway to the java application ShaidyMapGen.jar.", - "If this is not assigned, then an enviornmental variable that ", - "contains the ")) - -pargs <- optparse::add_option(pargs, c("--gene_symbol"), - type="character", - action="store", - default=NULL, - dest="gene_symbol", - metavar="Gene_Symbol", - help=paste("The labeling type used to represent the genes in the expression", - "data. This needs to be passed in order to add linkouts to the ", - "genes. Possible gene label types to choose from are specified on", - "the broadinstitute/inferCNV wiki and bmbroom/NGCHM-config-biobase.")) + action="store_true", + dest="no_prelim_plot", + metavar="No Preliminary Plot", + help=paste("don't make the preliminary infercnv image", + "[Default %default]")) +pargs <- optparse::add_option(pargs, c("--median_filter"), + type="logical", + default=FALSE, + action="store_true", + dest="median_filter", + metavar="Median Filter", + help=paste("If True, turns on additional median", + " filtering for an additional plot. ", + "[Default %default]")) -args_parsed <- optparse::parse_args(pargs, positional_arguments=2) -args <- args_parsed$options -args["input_matrix"] <- args_parsed$args[1] -args["gene_order"] <- args_parsed$args[2] +args <- optparse::parse_args(pargs) # Check arguments -args <- check_arguments(args) +#args <- check_arguments(args) +if (!is.null(args$final_scale_limits)) { + if (grepl(',', args$final_scale_limits)) { + args$final_scale_limits = as.double(strsplit(args$final_scale_limits, ",")) + } +} -# Make sure the output directory exists -if(!file.exists(args$output_dir)){ - dir.create(args$output_dir) +if (!is.null(args$ref_group_names)) { + args$ref_group_names = strsplit(args$ref_group_names, ",")[[1]] +} else { + args$ref_group_names = c() } + # Parse bounds -bounds_viz <- c(NA,NA) -if (!is.na(args$bound_threshold_vis)){ - bounds_viz <- as.numeric(unlist(strsplit(args$bound_threshold_vis,","))) -} -if (length(bounds_viz) != 2){ - error_message <- paste("Please use the correct format for the argument", - "--vis_bound_threshold . Two numbers seperated", - "by a comma is expected (lowerbound,upperbound)", - ". As an example, to indicate that outliers are", - "outside of -1 and 1 give the following.", - "--vis_bound_threshold -1,1") - stop(error_message) -} +#bounds_viz <- c(NA,NA) +#if (!is.na(args$bound_threshold_vis)){ +# bounds_viz <- as.numeric(unlist(strsplit(args$bound_threshold_vis,","))) +#} +#if (length(bounds_viz) != 2){ +# error_message <- paste("Please use the correct format for the argument", +# "--vis_bound_threshold . Two numbers seperated", +# "by a comma is expected (lowerbound,upperbound)", +# ". As an example, to indicate that outliers are", +# "outside of -1 and 1 give the following.", +# "--vis_bound_threshold -1,1") +# stop(error_message) +#} # Set up logging file -logging::basicConfig(level=args$log_level) -if (!is.na(args$log_file)){ - logging::addHandler(logging::writeToFile, - file=args$log_file, - level=args$log_level) -} +#logging::basicConfig(level=args$log_level) +#if (!is.na(args$log_file)){ +# logging::addHandler(logging::writeToFile, +# file=args$log_file, +# level=args$log_level) +#} # Log the input parameters logging::loginfo(paste("::Input arguments. Start.")) @@ -521,230 +692,80 @@ for (arg_name in names(args)){ } logging::loginfo(paste("::Input arguments. End.")) -# Manage inputs -logging::loginfo(paste("::Reading data matrix.", sep="")) -# Row = Genes/Features, Col = Cells/Observations -expression_data <- read.table(args$input_matrix, sep=args$delim, header=T, row.names=1, check.names=FALSE) -logging::loginfo(paste("Original matrix dimensions (r,c)=", - paste(dim(expression_data), collapse=","))) - -# Read in the gen_pos file -input_gene_order <- seq(1, nrow(expression_data), 1) -if (args$gene_order != ""){ - input_gene_order <- read.table(args$gene_order, header=FALSE, row.names=1, sep="\t") - names(input_gene_order) <- c(CHR, START, STOP) -} -logging::loginfo(paste("::Reading gene order.", sep="")) -logging::logdebug(paste(head(args$gene_order[1]), collapse=",")) - -# Default the reference samples to all -input_reference_samples <- colnames(expression_data) -observations_annotations_names = NULL - -if (!is.null(args$reference_observations)){ - - ## replaces OLD args$num_groups - input_classifications <- read.table(args$reference_observations, header=FALSE, row.names=1, sep="\t", stringsAsFactors = FALSE) - # sort input classifications to same order as expression_data - input_classifications <- input_classifications[order(match(row.names(input_classifications), colnames(expression_data))), , drop=FALSE] - - # make a list of list of positions that are going to be refs for each classification - name_ref_groups_indices <- list() - refs <- c() - for (name_group in args$name_ref_groups) { - name_ref_groups_indices[length(name_ref_groups_indices) + 1] <- list(which(input_classifications[,1] == name_group)) - refs <- c(refs, row.names(input_classifications[which(input_classifications[,1] == name_group), , drop=FALSE])) +infercnv_obj <- infercnv::CreateInfercnvObject(raw_counts_matrix=args$raw_counts_matrix, + gene_order_file=args$gene_order_file, + annotations_file=args$annotations_file, + ref_group_names=args$ref_group_names, + delim=args$delim, + max_cells_per_group=args$max_cells_per_group, + chr_exclude=args$chr_exclude) + +infercnv_obj = infercnv::run(infercnv_obj=infercnv_obj, + cutoff=args$cutoff, + min_cells_per_gene=args$min_cells_per_gene, + out_dir=args$out_dir, + analysis_mode=args$analysis_mode, + window_length=args$window_length, + smooth_method=args$smooth_method, + num_ref_groups=args$num_ref_groups, + ref_subtract_use_mean_bounds=args$ref_subtract_use_mean_bounds, + max_centered_threshold=args$max_centered_threshold, + tumor_subcluster_pval=args$tumor_subcluster_pval, + tumor_subcluster_partition_method=args$tumor_subcluster_partition_method, + HMM=args$HMM, + HMM_transition_prob=args$HMM_transition_prob, + HMM_report_by=args$HMM_report_by, + HMM_type=args$HMM_type, + # HMM_i3_z_pval=args$HMM_i3_z_pval, + #sim_method=args$sim_method, + #sim_foreground=args$sim_foreground, + scale_data=args$scale_data, + denoise=args$denoise, + noise_filter=args$noise_filter, + sd_amplifier=args$sd_amplifier, + noise_logistic=args$noise_logistic, + cluster_by_groups=args$cluster_by_groups, + k_obs_groups=args$k_obs_groups, + outlier_method_bound=args$outlier_method_bound, + outlier_lower_bound=args$outlier_lower_bound, + outlier_upper_bound=args$outlier_upper_bound, + hclust_method=args$hclust_method, + #remove_genes_at_chr_ends=args$remove_genes_at_chr_ends, + #mask_nonDE_genes=args$mask_nonDE_genes, + #mask_nonDE_pval=args$mask_nonDE_pval, + #test.use=args$test.use, + #require_DE_all_normals=args$require_DE_all_normals, + plot_steps=args$plot_steps, + no_plot=args$no_plot, + no_prelim_plot=args$no_prelim_plot, + debug=args$debug, + #prune_outliers=args$prune_outliers, + final_scale_limits=args$final_scale_limits, + final_center_val=args$final_center_val, + #reuse_subtracted=args$reuse_subtracted, + num_threads=args$num_threads#, + #hspike_aggregate_normals =args$hspike_aggregate_normals + ) + +if (args$median_filter) { + + infercnv_obj = infercnv::apply_median_filtering(infercnv_obj) + + if (is.null(args$final_scale_limits)) { + args$final_scale_limits = "auto" } - input_reference_samples <- unique(refs) - - all_annotations = unique(input_classifications[,1]) - observations_annotations_names = setdiff(all_annotations, args$name_ref_groups) - - # # This argument can be either a list of column labels - # # which is a comma delimited list of column labels - # # holding a comma delimited list of column labels - # refs <- args$reference_observations - # if (file.exists(args$reference_observations)){ - # refs <- scan(args$reference_observations, - # what="character", - # quiet=TRUE) - # refs <- paste(refs, collapse=",") - # } - # # Split on comma - # refs <- unique(unlist(strsplit(refs, ",", fixed=FALSE))) - # # Remove multiple spaces to single spaces - # refs <- unique(unlist(strsplit(refs, " ", fixed=FALSE))) - # refs <- refs[refs != ""] - # # Normalize names with make.names so they are treated - # # as the matrix column names - # refs <- make.names(refs) - # if (length(refs) > 0){ - # input_reference_samples <- refs - # } - logging::logdebug(paste("::Reference observations set to: ", input_reference_samples, collapse="\n")) -} - -# Make sure the given reference samples are in the matrix. -if (length(input_reference_samples) != - length(intersect(input_reference_samples, colnames(expression_data)))){ - missing_reference_sample <- setdiff(input_reference_samples, - colnames(expression_data)) - error_message <- paste("Please make sure that all the reference sample", - "names match a sample in your data matrix.", - "Attention to: ", - paste(missing_reference_sample, collapse=",")) - logging::logdebug(paste("::colnames(expression_data): ", colnames(expression_data), collapse="\n")) - logging::logerror(error_message) - stop(error_message) -} - -# Order and reduce the expression to the genomic file. -order_ret <- infercnv::order_reduce(data=expression_data, - genomic_position=input_gene_order) -expression_data <- order_ret$expr -input_gene_order <- order_ret$order -if(is.null(expression_data)){ - error_message <- paste("None of the genes in the expression data", - "matched the genes in the reference genomic", - "position file. Analysis Stopped.") - stop(error_message) -} - -obs_annotations_groups <- input_classifications[,1] -counter <- 1 -for (classification in observations_annotations_names) { - obs_annotations_groups[which(obs_annotations_groups == classification)] <- counter - counter <- counter + 1 -} -names(obs_annotations_groups) <- rownames(input_classifications) -obs_annotations_groups <- obs_annotations_groups[input_classifications[,1] %in% observations_annotations_names] # filter based on initial input in case some input annotations were numbers overlaping with new format -obs_annotations_groups <- as.integer(obs_annotations_groups) - -if (args$save) { - logging::loginfo("Saving workspace") - save.image("infercnv.Rdata") -} - -# Make sure the required java application ShaidyMapGen.jar exists. -if (args$ngchm){ - ## if argument is passed, check if file exists - if (!is.null(args$path_to_shaidyMapGen)) { - if (!file.exists(args$path_to_shaidyMapGen)){ - error_message <- paste("Cannot find the file ShaidyMapGen.jar using path_to_shaidyMapGen.", - "Make sure the entire pathway is being used.") - logging::logerror(error_message) - stop(error_message) - } else { - shaidy.path <- unlist(strsplit(args$path_to_shaidyMapGen, split = .Platform$file.sep)) - if (tail(shaidy.path, n = 1L) != "ShaidyMapGen.jar") { - stop("Check pathway to ShaidyMapGen: ", args$path_to_shaidyMapGen, - "\n Make sure to add 'ShaidyMapGen.jar' to the end of the path.") - } - } - } else { - ## check if envionrmental variable is passed and check if file exists - if(exists("SHAIDYMAPGEN")) { - if (!file.exists(SHAIDYMAPGEN)){ - error_message <- paste("Cannot find the file ShaidyMapGen.jar using SHAIDYMAPGEN.", - "Make sure the entire pathway is being used.") - logging::logerror(error_message) - stop(error_message) - } else { - args$path_to_shaidyMapGen <- SHAIDYMAPGEN - } - } - if (Sys.getenv("SHAIDYMAPGEN") != "") { - if (!file.exists(Sys.getenv("SHAIDYMAPGEN"))){ - error_message <- paste("Cannot find the file ShaidyMapGen.jar using SHAIDYMAPGEN.", - "Make sure the entire pathway is being used.") - logging::logerror(error_message) - stop(error_message) - } else { - args$path_to_shaidyMapGen <- Sys.getenv("SHAIDYMAPGEN") - } - } - } -} - -# Run CNV inference -ret_list = infercnv::infer_cnv(data=expression_data, - gene_order=input_gene_order, - cutoff=args$cutoff, - reference_obs=input_reference_samples, - transform_data=args$log_transform, - window_length=args$window_length, - max_centered_threshold=args$max_centered_expression, - noise_threshold=args$magnitude_filter, - name_ref_groups=args$name_ref_groups, - num_ref_groups=name_ref_groups_indices, - obs_annotations_groups=obs_annotations_groups, - out_path=args$output_dir, - k_obs_groups=args$num_obs_groups, - plot_steps=args$plot_steps, - contig_tail=args$contig_tail, - method_bound_vis=args$bound_method_vis, - lower_bound_vis=bounds_viz[1], - upper_bound_vis=bounds_viz[2], - ref_subtract_method=args$ref_subtract_method, - hclust_method=args$hclust_method) - -# Log output -logging::loginfo(paste("::infer_cnv:Writing final data to ", - file.path(args$output_dir, - "expression_pre_vis_transform.txt"), sep="_")) -# Output data before viz outlier -write.table(ret_list["PREVIZ"], sep=args$delim, - file=file.path(args$output_dir, - "expression_pre_vis_transform.txt")) -# Output data after viz outlier -write.table(ret_list["VIZ"], sep=args$delim, - file=file.path(args$output_dir, - "expression_post_viz_transform.txt")) -logging::loginfo(paste("::infer_cnv:Current data dimensions (r,c)=", - paste(dim(ret_list[["VIZ"]]), collapse=","), sep="")) - -logging::loginfo(paste("::infer_cnv:Drawing plots to file:", - args$output_dir, sep="")) - - -if (args$save) { - logging::loginfo("Saving workspace") - save.image("infercnv.Rdata") -} - - -if (args$plot_steps) { - logging::loginfo("See results from each stage plotted separately") -} else { - - infercnv::plot_cnv(plot_data=ret_list[["VIZ"]], - contigs=ret_list[["CONTIGS"]], - k_obs_groups=args$num_obs_groups, - obs_annotations_groups=obs_annotations_groups, - reference_idx=ret_list[["REF_OBS_IDX"]], - ref_contig=args$clustering_contig, - contig_cex=args$contig_label_size, - ref_groups=ret_list[["REF_GROUPS"]], - out_dir=args$output_dir, - color_safe_pal=args$use_color_safe, - hclust_method=args$hclust_method, - title=args$fig_main, - obs_title=args$obs_main, - ref_title=args$ref_main) - -} + if (is.null(args$final_center_val)) { + args$final_center_val = 1 + } + + plot_cnv(infercnv_obj, + k_obs_groups=args$k_obs_groups, + cluster_by_groups=args$cluster_by_groups, + out_dir=args$out_dir, + x.center=args$final_center_val, + x.range=args$final_scale_limits, + title="inferCNV", + output_filename="infercnv_median_filtered", + write_expr_matrix=TRUE) -if (args$ngchm) { - logging::loginfo("Creating NGCHM as infercnv.ngchm") - infercnv::Create_NGCHM(plot_data = ret_list[["VIZ"]], - path_to_shaidyMapGen = args$path_to_shaidyMapGen, - reference_idx = ret_list[["REF_OBS_IDX"]], - ref_index = name_ref_groups_indices, - location_data = input_gene_order, - out_dir = args$output_dir, - contigs = ret_list[["CONTIGS"]], - ref_groups = ret_list[["REF_GROUPS"]], - title = args$fig_main, - gene_symbol = ards$gene_symbol) } - diff --git a/scripts/inferCNV_to_HB.R b/scripts/inferCNV_to_HB.R new file mode 100755 index 00000000..d2233a70 --- /dev/null +++ b/scripts/inferCNV_to_HB.R @@ -0,0 +1,103 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--infercnv_obj", help="infercnv_obj file", required=TRUE, nargs=1) +parser$add_argument("--no_scale_data", help="dont scale the data (ie. already scaled)", required=F, action='store_true', default=FALSE) +args = parser$parse_args() + +library(infercnv) +library(ggplot2) +library(futile.logger) +library(HoneyBADGER) + +infercnv_obj_file = args$infercnv_obj + +infercnv_obj = readRDS(infercnv_obj_file) + +require(biomaRt) ## for gene coordinates +mart.obj <- useMart(biomart = "ENSEMBL_MART_ENSEMBL", + dataset = 'hsapiens_gene_ensembl', + host = "jul2015.archive.ensembl.org") + +do_scale=TRUE +if (args$no_scale_data) { + do_scale=FALSE +} + + +run_hbadger <- function(tumor_group_name, normal_matrix, tumor_matrix) { + + hb <- new('HoneyBADGER', name=tumor_group_name) + + ref_normal <- rowMeans(normal_matrix) + + hb$setGexpMats(tumor_matrix, ref_normal, mart.obj, filter=FALSE, scale=do_scale, verbose=TRUE) + + pdf(sprintf("%s-hb.pdf", tumor_group_name)) + + hb$plotGexpProfile() ## initial visualization + + + hb$setMvFit(verbose=TRUE) + hb$setGexpDev(verbose=TRUE) + hb$calcGexpCnvBoundaries(init=TRUE, verbose=FALSE) + + + ## double check what CNVs were identified + bgf <- hb$bound.genes.final + genes <- hb$genes + regions.genes <- range(genes[unlist(bgf)]) + + print(regions.genes) + + if (length(regions.genes) == 0) { + message("No cnv regions identified") + return() + } + + ## Indeed, our initial HMM has identified a number of candidate CNVs to test. We can now retest all identified CNVs on all cells to derive the final posterior probability of each CNV in each cell. We can cluster cells on these posterior probabilities and visualize them as a heatmap. + + hb$retestIdentifiedCnvs(retestBoundGenes = TRUE, retestBoundSnps = FALSE, verbose=FALSE) + + ## look at final results + results <- hb$summarizeResults(geneBased=TRUE, alleleBased=FALSE) + print(head(results[,1:7])) + write.table(results[,1:7], sprintf("%s-hb.cnvs.tsv", tumor_group_name), quote=F, sep="\t") + + + ## visualize as heatmap + trees <- hb$visualizeResults(geneBased=TRUE, alleleBased=FALSE, details=TRUE, margins=c(25,15)) + + ## order cells + hc <- trees$hc + order <- hc$labels[hc$order] + ## plot all chromosomes + hb$plotGexpProfile(cellOrder=order) + + + ## plot just identified cnvs + hb$plotGexpProfile(cellOrder=order, region=hb$cnvs[['gene-based']][['amp']]) + + hb$plotGexpProfile(cellOrder=order, region=hb$cnvs[['gene-based']][['del']]) + + +} + + + + +normal_matrix = infercnv_obj@expr.data[, unlist(infercnv_obj@reference_grouped_cell_indices), drop=F] + +tumor_groups = infercnv_obj@observation_grouped_cell_indices + +tumor_group_names = names(tumor_groups) +tumor_group_name = tumor_group_names[1] # for debugging +for (tumor_group_name in tumor_group_names) { + tumor_grp_idx = tumor_groups[[tumor_group_name]] + + tumor_matrix = infercnv_obj@expr.data[,tumor_grp_idx] + + run_hbadger(tumor_group_name, normal_matrix, tumor_matrix) +} diff --git a/scripts/infercnv_obj_to_input_files.R b/scripts/infercnv_obj_to_input_files.R new file mode 100755 index 00000000..246f5650 --- /dev/null +++ b/scripts/infercnv_obj_to_input_files.R @@ -0,0 +1,65 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--infercnv_obj", help="infercnv_obj file", required=TRUE, nargs=1) +args = parser$parse_args() + +library(infercnv) + +infercnv_obj_file = args$infercnv_obj + +infercnv_obj = readRDS(infercnv_obj_file) + +## write counts matrix +write.table(infercnv_obj@count.data, file='sc.counts.matrix', quote=F, sep="\t") + +cellnames = colnames(infercnv_obj@count.data) + +groupings = c(infercnv_obj@reference_grouped_cell_indices, infercnv_obj@observation_grouped_cell_indices) + +## write cell annotation file +cell.annots = do.call(rbind, lapply(names(groupings), function(groupname) { + cell_idx = groupings[[ groupname ]] + group.cellnames = cellnames[cell_idx] + + return(data.frame(cells=group.cellnames, type=groupname)) +})) + +cell.annots = cell.annots[ cell.annots$cells %in% colnames(infercnv_obj@count.data), ] + +write.table(cell.annots, file="cell_annots.txt", quote=F, row.names=F, col.names=F, sep="\t") + +## write infercnv runner: + +cat(file='run.infercnv.R', sprintf("#!/usr/bin/env Rscript + +options(error = function() { traceback(2); q(status = 1) } ) + +library(\"infercnv\") + +# create the infercnv object +infercnv_obj = CreateInfercnvObject(raw_counts_matrix=\"sc.counts.matrix\", + annotations_file=\"cell_annots.txt\", + delim=\"\t\", + gene_order_file=\"gencode_v19_gene_pos.txt\", + ref_group_names=c(\'%s\')) + +out_dir=\"output_dir\" +# perform infercnv operations to reveal cnv signal +infercnv_obj = infercnv::run(infercnv_obj, + cutoff=1, # cutoff=1 works well for Smart-seq2, and cutoff=0.1 works well for 10x Genomics + out_dir=out_dir, + cluster_by_groups=T, + plot_steps=T, + HMM=T, + #HMM_mode='subclusters', + HMM_mode='samples', + sim_method='meanvar' + ) +", paste(names(infercnv_obj@reference_grouped_cell_indices),collapse="','") ) ) + +Sys.chmod('run.infercnv.R', mode = "0775") + + diff --git a/scripts/matrix_to_ideogram_annots.py b/scripts/matrix_to_ideogram_annots.py deleted file mode 100755 index 7f327d57..00000000 --- a/scripts/matrix_to_ideogram_annots.py +++ /dev/null @@ -1,266 +0,0 @@ -#!/usr/bin/env python - -"""Converts clustered gene expression matrices to Ideogram.js annotations -""" - -__author__ = 'Eric Weitz, Jonathan Bistline, Timothy Tickle' -__copyright__ = 'Copyright 2018' -__credits__ = ['Eric Weitz'] -__license__ = 'BSD-3' -__maintainer__ = 'Eric Weitz' -__email__ = 'eweitz@bbroadinstitute.org' -__status__ = 'Development' - -from argparse import ArgumentParser, RawDescriptionHelpFormatter -import json -from statistics import mean - - -class MatrixToIdeogramAnnots: - - def __init__(self, infercnv_output, infercnv_delimiter, gen_pos_file, - clusters_meta, output_file): - """Class and parameter docs in module summary and argument parser""" - - self.infercnv_output = infercnv_output - self.infercnv_delimiter = infercnv_delimiter - self.clusters = self.get_clusters(clusters_meta) - self.output_file = output_file - self.genomic_position_file_path = gen_pos_file - - self.genes = self.get_genes() - - self.write_ideogram_annots() - - def write_ideogram_annots(self): - """Write Ideogram.js annotations JSON data to specified output file""" - - ideogram_annots = self.get_ideogram_annots() - - ideogram_annots_json = json.dumps(ideogram_annots) - - with open(self.output_file, 'w') as f: - f.write(ideogram_annots_json) - - print('Wrote Ideogram.js annotations to ' + self.output_file) - - def get_ideogram_annots(self): - """Get Ideogram.js annotations from inferCNV and cluster data - - Format and other details of Ideogram.js annotations: - https://github.com/eweitz/ideogram/wiki/Annotations - """ - - genes = self.genes - - expression_means = self.compute_gene_expression_means() - - keys = ['name', 'start', 'length'] - keys += ['all'] + list(self.clusters.keys()) # cluster names - - annots_by_chr = {} - - for i, expression_mean in enumerate(expression_means[1:]): - gene_id = expression_mean[0] - gene = genes[gene_id] - - chr = gene['chr'] - start = int(gene['start']) - stop = int(gene['stop']) - length = stop - start - - if chr not in annots_by_chr: - annots_by_chr[chr] = [] - - annot = [gene_id, start, length] - - if i % 1000 == 0 and i != 0: - print('Constructed ' + str(i) + ' of ' + str(len(expression_means) - 1) + ' annots') - - annot += expression_mean[1:] - - annots_by_chr[chr].append(annot) - - annots_list = [] - - for chr in annots_by_chr: - annots = annots_by_chr[chr] - annots_list.append({'chr': chr, 'annots': annots}) - - ideogram_annots = {'keys': keys, 'annots': annots_list} - - return ideogram_annots - - def get_genes(self): - """Convert inferCNV genomic position file into useful 'genes' dict""" - - genes = {} - - with open(self.genomic_position_file_path) as f: - lines = f.readlines() - - for line in lines: - columns = line.strip().split() - id, chr, start, stop = columns - genes[id] = { - 'id': id, - 'chr': chr, - 'start': start, - 'stop': stop - } - - return genes - - def get_expression_matrix_dict(self): - """Parse inferCNV output, return dict of cell expressions by gene""" - print(self.get_expression_matrix_dict.__doc__) - - em_dict = {} - - with open(self.infercnv_output) as f: - lines = f.readlines() - - cells_dict = {} - - cells_list = lines[0].strip().split(self.infercnv_delimiter) - - for i, cell in enumerate(cells_list): - cell = cell.strip('"').split('PREVIZ.')[1].replace('.', '-') # "PREVIZ.AAACATACAAGGGC.1" -> AAACATACAAGGGC-1 - cells_dict[cell] = i - - em_dict['cells'] = cells_dict - genes = {} - - for line in lines[1:]: - columns = line.strip().split(self.infercnv_delimiter) - gene = columns[0].strip('"') - expression_by_cell = list(map(float, columns[1:])) - - genes[gene] = expression_by_cell - - em_dict['genes'] = genes - - return em_dict - - def get_clusters(self, clusters_meta): - """Assign cells from expression matrix to the appropriate cluster""" - - clusters = clusters_meta - - for name in clusters: - - cells = [] - - clusters[name]['cells'] = [] - cluster_path = clusters[name]['path'] - - with open(cluster_path) as f: - lines = f.readlines() - - for line in lines[3:]: - cell = line.split()[0] - cells.append(cell) - - clusters[name]['cells'] = cells - - return clusters - - - def compute_gene_expression_means(self): - """Compute mean expression for each gene across all and each cluster""" - - scores_lists = [] - - cluster_names = list(self.clusters.keys()) - - keys = ['name', 'all'] + cluster_names - scores_lists.append(keys) - - matrix = self.get_expression_matrix_dict() - - cells = matrix['cells'] - clusters = self.clusters - - gene_expression_lists = matrix['genes'] - - # For each gene, get its mean expression across all cells, - # then iterate through each cluster (a.k.a. ordination), - # and get the mean expression across all cells in that cluster - for i, gene in enumerate(gene_expression_lists): - - gene_exp_list = gene_expression_lists[gene] - mean_expression_all = round(mean(gene_exp_list), 3) - - scores_list = [gene, mean_expression_all] - - for name in clusters: - cluster = clusters[name] - cluster_expressions = [] - for cluster_cell in cluster['cells']: - # if cluster_cell in cells: - index_of_cell_in_matrix = cells[cluster_cell] - 1 - # else: - # if i == 0: - # print(cluster_cell + ' from ' + name + ' not found in expression matrix') - # continue - gene_exp_in_cell = gene_exp_list[index_of_cell_in_matrix] - cluster_expressions.append(gene_exp_in_cell) - - mean_cluster_expression = round(mean(cluster_expressions), 3) - scores_list.append(mean_cluster_expression) - - if i % 1000 == 0 and i != 0: - print('Processed ' + str(i) + ' of ' + str(len(gene_expression_lists))) - - scores_lists.append(scores_list) - - return scores_lists - - -def get_clusters_meta(names, paths): - """Organize cluster args provided via CLI into a more convenient dict""" - - if len(names) != len(paths): - raise ValueError('Number of cluster names must equal length of cluster paths') - - clusters_meta = {} - - for i, name in enumerate(names): - clusters_meta[name] = {'path': paths[i]} - - return clusters_meta - - -if __name__ == '__main__': - - # Parse command-line arguments - ap = ArgumentParser(description=__doc__, # Use text from file summary up top - formatter_class=RawDescriptionHelpFormatter) - ap.add_argument('--infercnv_output', - help='Path to pre_vis_transform.txt output from inferCNV') - ap.add_argument('--infercnv_delimiter', - help='Delimiter in pre_vis_transform.txt output from inferCNV. Default: \\t', - default='\t') - ap.add_argument('--gen_pos_file', - help='Path to gen_pos.txt genomic positions file from inferCNV ') - ap.add_argument('--cluster_names', - help='List of cluster names', - nargs='+') # List must have one or more items - ap.add_argument('--cluster_paths', - help='List of cluster paths or URLs', - nargs='+') - ap.add_argument('--output_file', - help='Path for write output') - - args = ap.parse_args() - - infercnv_output = args.infercnv_output - infercnv_delimiter = args.infercnv_delimiter - gen_pos_file = args.gen_pos_file - cluster_names = args.cluster_names - cluster_paths = args.cluster_paths - output_file = args.output_file - - clusters_meta = get_clusters_meta(cluster_names, cluster_paths) - - MatrixToIdeogramAnnots(infercnv_output, infercnv_delimiter, gen_pos_file, clusters_meta, output_file) diff --git a/scripts/meanvar_sim_counts.R b/scripts/meanvar_sim_counts.R new file mode 100755 index 00000000..75e251d5 --- /dev/null +++ b/scripts/meanvar_sim_counts.R @@ -0,0 +1,62 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) +options(error = function() {traceback(2);quit(save = "no", status = 0, runLast = FALSE)}) + +parser = ArgumentParser() + +parser$add_argument("--infercnv_obj", help="total sum normalized infercnv obj", required=TRUE, default=NULL, nargs=1) +parser$add_argument("--ncells", help="number of cells to simulate", required=FALSE, type='integer', nargs=1, default=-1) +parser$add_argument("--ngenes", help="number of genes to simulate", required=FALSE, type='integer', nargs=1, default=-1) +parser$add_argument("--output_prefix", help='prefix for output matrix file', required=TRUE, nargs=1) + +args = parser$parse_args() + +library(infercnv) +library(SingleCellExperiment) +library("methods") +library(tidyverse) + + +infercnv_obj_file = args$infercnv_obj + +ncells = args$ncells +ngenes = args$ngenes +output_prefix = args$output_prefix + +infercnv_obj = readRDS(infercnv_obj_file) + +expr.data = infercnv_obj@expr.data[, unlist(infercnv_obj@reference_grouped_cell_indices)] + +if (ncells < 0) { + ncells = ncol(expr.data) +} +if (ngenes < 0) { + ngenes = nrow(expr.data) +} + +## sim using specified gene means +gene_means = rowMeans(expr.data) +gene_means = gene_means[gene_means>0] + +gene_means = sample(x=gene_means, size=ngenes, replace=T) + +newnames = paste0('gene', 1:ngenes) + +names(gene_means) = newnames + + +sim_matrix <- infercnv:::.get_simulated_cell_matrix_using_meanvar_trend(infercnv_obj, gene_means, ncells, TRUE) + + +output_filename = paste0(output_prefix, ".counts.matrix") +write.table(sim_matrix, file=output_filename, quote=F, sep='\t') + +pdf(paste0(output_prefix, ".KS.pdf")) +infercnv:::KS_plot("meanVarSim", as.numeric(log(expr.data+1)), as.numeric(log(sim_matrix+1))) + + + + + + diff --git a/scripts/plot_hspike.R b/scripts/plot_hspike.R new file mode 100755 index 00000000..aed928c3 --- /dev/null +++ b/scripts/plot_hspike.R @@ -0,0 +1,36 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--infercnv_obj", help="infercnv_obj file", required=TRUE, nargs=1) +args = parser$parse_args() + +library(infercnv) +library(ggplot2) + +infercnv_obj_file = args$infercnv_obj + +infercnv_obj = readRDS(infercnv_obj_file) + +if (! is.null(infercnv_obj@.hspike)) { + out_prefix = paste0(infercnv_obj_file, '.hspike') + plot_cnv(infercnv_obj@.hspike, + out_dir=dirname(infercnv_obj_file), + output_filename=basename(out_prefix) ) + + + hspike_obj = infercnv_obj@.hspike + hspike_gene_expr_by_cnv <- infercnv:::.get_gene_expr_by_cnv(hspike_obj) + hspike_cnv_mean_sd <- infercnv:::.get_gene_expr_mean_sd_by_cnv(hspike_gene_expr_by_cnv) + p = infercnv:::.plot_gene_expr_by_cnv(gene_expr_by_cnv=hspike_gene_expr_by_cnv, cnv_mean_sd=hspike_cnv_mean_sd) + pdf(paste0(infercnv_obj_file, '.hspike.dist.pdf')) + plot(p) + dev.off() + +} else { + message("no hspike to plot") +} + + + diff --git a/scripts/plot_hspike.by_num_cells.R b/scripts/plot_hspike.by_num_cells.R new file mode 100755 index 00000000..650406f9 --- /dev/null +++ b/scripts/plot_hspike.by_num_cells.R @@ -0,0 +1,78 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--infercnv_obj", help="infercnv_obj file", required=TRUE, nargs=1) +args = parser$parse_args() + +library(infercnv) +library(ggplot2) +library(dplyr) + +infercnv_obj_file = args$infercnv_obj + +infercnv_obj = readRDS(infercnv_obj_file) + +if (! is.null(infercnv_obj@.hspike)) { + hspike_obj = infercnv_obj@.hspike + + + pdf(paste0(infercnv_obj_file, '.hspike.dist_by_numcells.pdf')) + + + + + gene_expr_by_cnv <- infercnv:::.get_gene_expr_by_cnv(hspike_obj) + cnv_level_to_mean_sd = list() + + for (ncells in c(1,2,3,4,5,10,20,50,100)) { + + cnv_to_means = list() + cnv_mean_sd = list() + + for (cnv_level in names(gene_expr_by_cnv) ) { + expr_vals = gene_expr_by_cnv[[ cnv_level ]] + nrounds = 100 + + means = c() + + for(i in 1:nrounds) { + vals = sample(expr_vals, size=ncells, replace=T) + m_val = mean(vals) + means = c(means, m_val) + } + cnv_to_means[[ cnv_level ]] = means + cnv_mean_sd[[ cnv_level ]] = list(sd=sd(means), mean=mean(means)) + } + + ## plot + + df = do.call(rbind, lapply(names(cnv_to_means), function(x) { data.frame(cnv=x, expr=cnv_to_means[[x]]) })) + + p = df %>% ggplot(aes(expr, fill=cnv, colour=cnv)) + geom_density(alpha=0.1) + + p = p + + stat_function(fun=dnorm, color='black', args=list('mean'=cnv_mean_sd[["cnv:0.01"]]$mean,'sd'=cnv_mean_sd[["cnv:0.01"]]$sd)) + + stat_function(fun=dnorm, color='black', args=list('mean'=cnv_mean_sd[["cnv:0.5"]]$mean,'sd'=cnv_mean_sd[["cnv:0.5"]]$sd)) + + stat_function(fun=dnorm, color='black', args=list('mean'=cnv_mean_sd[["cnv:1"]]$mean,'sd'=cnv_mean_sd[["cnv:1"]]$sd)) + + stat_function(fun=dnorm, color='black', args=list('mean'=cnv_mean_sd[["cnv:1.5"]]$mean,'sd'=cnv_mean_sd[["cnv:1.5"]]$sd)) + + stat_function(fun=dnorm, color='black', args=list('mean'=cnv_mean_sd[["cnv:2"]]$mean,'sd'=cnv_mean_sd[["cnv:2"]]$sd)) + + stat_function(fun=dnorm, color='black', args=list('mean'=cnv_mean_sd[["cnv:3"]]$mean,'sd'=cnv_mean_sd[["cnv:3"]]$sd)) + + p = p + ggtitle(sprintf("num cells: %g", ncells)) + + plot(p) + + + } + + + dev.off() + +} else { + message("no hspike to plot") +} + + + diff --git a/scripts/plot_hspike.diff_normal_tumor.R b/scripts/plot_hspike.diff_normal_tumor.R new file mode 100755 index 00000000..b2569665 --- /dev/null +++ b/scripts/plot_hspike.diff_normal_tumor.R @@ -0,0 +1,42 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--infercnv_obj", help="infercnv_obj file", required=TRUE, nargs=1) +args = parser$parse_args() + +library(infercnv) +library(ggplot2) + +infercnv_obj_file = args$infercnv_obj + +infercnv_obj = readRDS(infercnv_obj_file) + +if (! is.null(infercnv_obj@.hspike)) { + pdfname = paste0(infercnv_obj_file, '.hspike.diff_normal_tumor.pdf') + + pdf(pdfname) + hspike = infercnv_obj@.hspike + + normal_matrix = hspike@expr.data[,unlist(hspike@reference_grouped_cell_indices)] + tumor_matrix = hspike@expr.data[,unlist(hspike@observation_grouped_cell_indices)] + + normal.means = rowMeans(normal_matrix) + tumor.means = rowMeans(tumor_matrix) + + plot(normal.means, ylim=range(normal.means, tumor.means)) + points(tumor.means, col='green') + + plot(tumor.means - normal.means) + abline(h=0, col='red') + + sm = caTools::runmean(tumor.means - normal.means, k=31) + points(sm, col='magenta') + +} else { + message("no hspike to plot") +} + + + diff --git a/scripts/plot_hspike_vs_sample_chrs.R b/scripts/plot_hspike_vs_sample_chrs.R new file mode 100755 index 00000000..658d4bd8 --- /dev/null +++ b/scripts/plot_hspike_vs_sample_chrs.R @@ -0,0 +1,74 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--infercnv_obj", help="infercnv_obj file", required=TRUE, nargs=1) + +args = parser$parse_args() + +library(infercnv) +library(futile.logger) +library(tidyverse) + + +infercnv_obj_file = args$infercnv_obj + +infercnv_obj = readRDS(infercnv_obj_file) + +gene_order = infercnv_obj@gene_order +gene_order = cbind(gene_order, gene=rownames(gene_order)) + +cnv_to_expr_vals = list() + +expr.data <- infercnv_obj@expr.data + +cnv_mean_sd = infercnv:::get_spike_dists(infercnv_obj@.hspike) + +chrs = unique(infercnv_obj@gene_order$chr) + +groups = c(infercnv_obj@observation_grouped_cell_indices, infercnv_obj@reference_grouped_cell_indices) + +samples = names(groups) + + +for (sample in samples) { + pdf_name = sprintf("%s-%s.cnv_expr_densities_each_chr.pdf", infercnv_obj_file, sub("[^A-Za-z0-9]", "_", sample, perl=TRUE)) + pdf(pdf_name) + + message(sprintf("plotting sample: %s", sample)) + + sample_cells = groups[[ sample ]] + + sample_expr = expr.data[, sample_cells] + + for (chr in chrs) { + chr_gene_idx = which(infercnv_obj@gene_order$chr == chr) + + sample_gene_expr = sample_expr[chr_gene_idx,] + + normal_gene_expr = expr.data[chr_gene_idx, unlist(infercnv_obj@reference_grouped_cell_indices)] + + df = rbind(data.frame(class='allnormal', vals=as.numeric(normal_gene_expr) ), + data.frame(class='sample', vals=as.numeric(sample_gene_expr)) ) + + message(sprintf("plotting sample: %s, %s", sample, chr)) + + p = df %>% ggplot(aes(vals, fill=class)) + geom_density(alpha=0.3) + ggtitle(sprintf("%s, %s", sample, chr)) + + p = p + + stat_function(fun=dnorm, color='black', args=list('mean'=cnv_mean_sd[["cnv:0.01"]]$mean,'sd'=cnv_mean_sd[["cnv:0.01"]]$sd)) + + stat_function(fun=dnorm, color='black', args=list('mean'=cnv_mean_sd[["cnv:0.5"]]$mean,'sd'=cnv_mean_sd[["cnv:0.5"]]$sd)) + + stat_function(fun=dnorm, color='black', args=list('mean'=cnv_mean_sd[["cnv:1"]]$mean,'sd'=cnv_mean_sd[["cnv:1"]]$sd)) + + stat_function(fun=dnorm, color='black', args=list('mean'=cnv_mean_sd[["cnv:1.5"]]$mean,'sd'=cnv_mean_sd[["cnv:1.5"]]$sd)) + + stat_function(fun=dnorm, color='black', args=list('mean'=cnv_mean_sd[["cnv:2"]]$mean,'sd'=cnv_mean_sd[["cnv:2"]]$sd)) + + stat_function(fun=dnorm, color='black', args=list('mean'=cnv_mean_sd[["cnv:3"]]$mean,'sd'=cnv_mean_sd[["cnv:3"]]$sd)) + + + + plot(p) + + } + dev.off() +} + diff --git a/scripts/plot_infercnv_obj.R b/scripts/plot_infercnv_obj.R new file mode 100755 index 00000000..dc64391c --- /dev/null +++ b/scripts/plot_infercnv_obj.R @@ -0,0 +1,17 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--infercnv_obj", help="infercnv_obj file", required=TRUE, nargs=1) +args = parser$parse_args() + +library(infercnv) + +infercnv_obj_file = args$infercnv_obj + +infercnv_obj = readRDS(infercnv_obj_file) + +plot_cnv(infercnv_obj, + output_filename=basename(infercnv_obj_file)) + diff --git a/scripts/plot_tumor_vs_normal_chr_densities.R b/scripts/plot_tumor_vs_normal_chr_densities.R new file mode 100755 index 00000000..b2eadfe7 --- /dev/null +++ b/scripts/plot_tumor_vs_normal_chr_densities.R @@ -0,0 +1,61 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--infercnv_obj", help="infercnv_obj file", required=TRUE, nargs=1) +args = parser$parse_args() + +library(infercnv) +library(ggplot2) +library(futile.logger) +library(dplyr) + +infercnv_obj_file = args$infercnv_obj + +infercnv_obj = readRDS(infercnv_obj_file) + +ref_group_cell_indices = infercnv:::get_reference_grouped_cell_indices(infercnv_obj) +pdf_filename = paste0(infercnv_obj_file, ".chr_expr_densities.pdf") + +cnv_mean_sd = infercnv:::get_spike_dists(infercnv_obj@.hspike) + +pdf(pdf_filename) + +chrs = unique(infercnv_obj@gene_order$chr) + + +for (chr in chrs) { + + gene_idx = which(infercnv_obj@gene_order$chr == chr) + + ref_data_pts = as.numeric(infercnv_obj@expr.data[gene_idx,ref_group_cell_indices]) + + df = data.frame(class='normal', vals=ref_data_pts) + + for (tumor in names(infercnv_obj@observation_grouped_cell_indices) ) { + + tumor_cell_idx = infercnv_obj@observation_grouped_cell_indices[[ tumor ]] + tumor_data_pts = as.numeric(infercnv_obj@expr.data[gene_idx, tumor_cell_idx]) + + df = rbind(df, data.frame(class=tumor, vals=tumor_data_pts)) + } + + flog.info(sprintf("Plotting data for chr: %s", chr)) + + p = df %>% ggplot(aes(vals, fill=class)) + geom_density(alpha=0.3) + ggtitle(chr) # + scale_y_continuous(trans='log10', limits=c(1,NA)) + + + p = p + + stat_function(fun=dnorm, color='black', args=list('mean'=cnv_mean_sd[["cnv:0.01"]]$mean,'sd'=cnv_mean_sd[["cnv:0.01"]]$sd)) + + stat_function(fun=dnorm, color='black', args=list('mean'=cnv_mean_sd[["cnv:0.5"]]$mean,'sd'=cnv_mean_sd[["cnv:0.5"]]$sd)) + + stat_function(fun=dnorm, color='black', args=list('mean'=cnv_mean_sd[["cnv:1"]]$mean,'sd'=cnv_mean_sd[["cnv:1"]]$sd)) + + stat_function(fun=dnorm, color='black', args=list('mean'=cnv_mean_sd[["cnv:1.5"]]$mean,'sd'=cnv_mean_sd[["cnv:1.5"]]$sd)) + + stat_function(fun=dnorm, color='black', args=list('mean'=cnv_mean_sd[["cnv:2"]]$mean,'sd'=cnv_mean_sd[["cnv:2"]]$sd)) + + stat_function(fun=dnorm, color='black', args=list('mean'=cnv_mean_sd[["cnv:3"]]$mean,'sd'=cnv_mean_sd[["cnv:3"]]$sd)) + + + + plot(p) +} + diff --git a/scripts/plot_tumor_vs_normal_chr_densities.i3.R b/scripts/plot_tumor_vs_normal_chr_densities.i3.R new file mode 100755 index 00000000..0c31207f --- /dev/null +++ b/scripts/plot_tumor_vs_normal_chr_densities.i3.R @@ -0,0 +1,63 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--infercnv_obj", help="infercnv_obj file", required=TRUE, nargs=1) +args = parser$parse_args() + +library(infercnv) +library(ggplot2) +library(futile.logger) +library(dplyr) + +infercnv_obj_file = args$infercnv_obj + +infercnv_obj = readRDS(infercnv_obj_file) + +ref_group_cell_indices = infercnv:::get_reference_grouped_cell_indices(infercnv_obj) +pdf_filename = paste0(infercnv_obj_file, ".i3.chr_expr_densities.pdf") + +normal_sd_trend = infercnv:::.i3HMM_get_sd_trend_by_num_cells_fit(infercnv_obj) + +mu = normal_sd_trend$mu +sigma = normal_sd_trend$sigma + + + +pdf(pdf_filename) + +chrs = unique(infercnv_obj@gene_order$chr) + +delta = infercnv:::get_HoneyBADGER_setGexpDev(gexp.sd=sigma, alpha=0.05, k_cells=7) + +for (chr in chrs) { + + gene_idx = which(infercnv_obj@gene_order$chr == chr) + + ref_data_pts = as.numeric(infercnv_obj@expr.data[gene_idx,ref_group_cell_indices]) + + df = data.frame(class='normal', vals=ref_data_pts) + + for (tumor in names(infercnv_obj@observation_grouped_cell_indices) ) { + + tumor_cell_idx = infercnv_obj@observation_grouped_cell_indices[[ tumor ]] + tumor_data_pts = as.numeric(infercnv_obj@expr.data[gene_idx, tumor_cell_idx]) + + df = rbind(df, data.frame(class=tumor, vals=tumor_data_pts)) + } + + flog.info(sprintf("Plotting data for chr: %s", chr)) + + p = df %>% ggplot(aes(vals, fill=class)) + geom_density(alpha=0.3) + ggtitle(chr) # + scale_y_continuous(trans='log10', limits=c(1,NA)) + + + p = p + + stat_function(fun=dnorm, color='black', args=list('mean'=mu,'sd'=sigma)) + + stat_function(fun=dnorm, color='blue', args=list('mean'=mu-delta,'sd'=sigma)) + + stat_function(fun=dnorm, color='blue', args=list('mean'=mu+delta,'sd'=sigma)) + + + plot(p) +} + diff --git a/scripts/recursive_random_tree_height_cutting.random_trees.R b/scripts/recursive_random_tree_height_cutting.random_trees.R new file mode 100755 index 00000000..f794e5f0 --- /dev/null +++ b/scripts/recursive_random_tree_height_cutting.random_trees.R @@ -0,0 +1,30 @@ +#!/usr/bin/env Rscript + + +hclust_method='ward.D2' + +num_rand_iters = 100 +MAX_PVAL=0.05 + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--infercnv_obj", help="infercnv_obj file", required=TRUE, nargs=1) +args = parser$parse_args() + +library(infercnv) +library(ggplot2) +library(futile.logger) +library(pheatmap) + +infercnv_obj = readRDS(args$infercnv_obj) + + +pdf("test.recursive_trees.pdf") + +adj.obj = infercnv:::define_signif_tumor_subclusters(infercnv_obj, p_val=0.05, hclust_method='ward.D2', partition_method='random_trees') + + + + + diff --git a/scripts/recursive_random_tree_height_cutting.sigclust2.R b/scripts/recursive_random_tree_height_cutting.sigclust2.R new file mode 100755 index 00000000..0a548284 --- /dev/null +++ b/scripts/recursive_random_tree_height_cutting.sigclust2.R @@ -0,0 +1,83 @@ +#!/usr/bin/env Rscript + + +hclust_method='ward.D2' + +num_rand_iters = 100 +MAX_PVAL=0.05 + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--infercnv_obj", help="infercnv_obj file", required=TRUE, nargs=1) +args = parser$parse_args() + +library(infercnv) +library(ggplot2) +library(futile.logger) +library(pheatmap) + +obj = readRDS(args$infercnv_obj) + +tumor.expr.data = obj@expr.data[, unlist(obj@observation_grouped_cell_indices)] + +gene_order = obj@gene_order +chrs = unique(gene_order$chr) + + +pdf("test.recursive_trees.pdf") + + +ALL_CLUSTERS = list() +MIN_CLUSTER_SIZE=3 + +library(sigclust2) + +recursive_cluster_cutting <- function(expr.matrix) { + + message("recursive_cluster_cutting()") + print(dim(expr.matrix)) + + if (dim(expr.matrix)[2] < MIN_CLUSTER_SIZE) { + message("cluster size too small. Storing cluster") + ALL_CLUSTERS[[length(ALL_CLUSTERS)+1]] <<- colnames(expr.matrix) + + print("Returning") + return(NULL) + print("Didn't actually return...") + } + + print("Onward") + print(dim(expr.matrix)) + + t_tumor.expr.data = t(expr.matrix) # cells as rows, genes as cols + + shc_result = shc(t_tumor.expr.data, metric='euclidean', linkage='ward.D2') + plot(shc_result) + + for(chr in chrs) { + chr_genes = which(gene_order$chr == chr) + + message(sprintf("plotting %s", chr)) + + shc_result = shc(t_tumor.expr.data[,chr_genes], metric='euclidean', linkage='ward.D2') + plot(shc_result) + } + + + +} + +recursive_cluster_cutting(tumor.expr.data) + +dev.off() + +print(ALL_CLUSTERS) + + + + + + + + diff --git a/scripts/recursive_random_tree_height_cutting.using_hmms.R b/scripts/recursive_random_tree_height_cutting.using_hmms.R new file mode 100755 index 00000000..40302ff9 --- /dev/null +++ b/scripts/recursive_random_tree_height_cutting.using_hmms.R @@ -0,0 +1,155 @@ +#!/usr/bin/env Rscript + + +hclust_method='ward.D2' + +num_rand_iters = 100 +MAX_PVAL=0.05 + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--infercnv_obj", help="infercnv_obj file", required=TRUE, nargs=1) +args = parser$parse_args() + +library(infercnv) +library(ggplot2) +library(futile.logger) +library(pheatmap) + +obj = readRDS(args$infercnv_obj) + +tumor.expr.data = obj@expr.data[, unlist(obj@observation_grouped_cell_indices)] + +gene_order = obj@gene_order +chrs = unique(gene_order$chr) + +tumor.expr.data[tumor.expr.data>3] <- 4 +tumor.expr.data[tumor.expr.data<3] <- 2 + + +pdf("test.recursive_trees.pdf") + + +ALL_CLUSTERS = list() +MIN_CLUSTER_SIZE=3 + + +recursive_cluster_cutting <- function(expr.matrix) { + + message("recursive_cluster_cutting()") + print(dim(expr.matrix)) + + if (dim(expr.matrix)[2] < MIN_CLUSTER_SIZE) { + message("cluster size too small. Storing cluster") + ALL_CLUSTERS[[length(ALL_CLUSTERS)+1]] <<- colnames(expr.matrix) + + print("Returning") + return(NULL) + print("Didn't actually return...") + } + + print("Onward") + print(dim(expr.matrix)) + + t_tumor.expr.data = t(expr.matrix) # cells as rows, genes as cols + d = dist(t_tumor.expr.data) + + h_obs = hclust(d, method=hclust_method) + + # permute by chromosomes + + permute_chr_col_vals <- function(df) { + + num_cells = nrow(df) + + for(chr in chrs) { + chr_genes = which(gene_order$chr == chr) + + df[, chr_genes] = df[sample(x=1:num_cells, size=num_cells, replace=F), chr_genes] + } + + df + } + + permute_col_vals <- function(df) { + + num_cells = nrow(df) + for (i in 1:ncol(df)) { + df[,i] = df[sample(x=1:num_cells, size=num_cells, replace=F), i] + } + + df + } + + + example_rand_matrix <- NULL + max_rand_heights = c() + for (i in 1:num_rand_iters) { + + ##rand.tumor.expr.data = permute_chr_col_vals(t_tumor.expr.data) + rand.tumor.expr.data = permute_col_vals(t_tumor.expr.data) + example_rand_matrix <- rand.tumor.expr.data + rand.dist = dist(rand.tumor.expr.data) + h_rand <- hclust(rand.dist, method=hclust_method) + + max_rand_heights = c(max_rand_heights, max(h_rand$height)) + } + + h = h_obs$height + + max_height = max(h) + + message(sprintf("Max Rand Heights(h): %s", paste(max_rand_heights, sep=",", collapse=","))) + + max_rand_height_dens = density(max_rand_heights) + plot(max_rand_height_dens, xlim=range(max_rand_height_dens$x, max_height)) + + e = ecdf(max_rand_heights) + message(sprintf("pvals(Lengths(h)): %s", paste(1-e(h), sep=",", collapse=","))) + + pval = 1- e(max_height) + message(sprintf("pval for max obs height: %g = %g", max_height, pval)) + + abline(v=max_height, col='red') + + pheatmap(t(expr.matrix), cluster_cols=F) + pheatmap(example_rand_matrix, cluster_cols=F) + + + #stop("stopping") + + if (max_height > 0 & pval <= MAX_PVAL) { + ## keep on cutting. + cut_height = mean(c(h[length(h)-1], h[length(h)])) + message(sprintf("cutting at height: %g", cut_height)) + grps = cutree(h_obs, h=cut_height) + print(grps) + uniqgrps = unique(grps) + for (grp in uniqgrps) { + grp_idx = which(grps==grp) + + message(sprintf("grp: %s contains idx: %s", grp, paste(grp_idx,sep=",", collapse=","))) + df = expr.matrix[,grp_idx,drop=F] + recursive_cluster_cutting(df) + } + } else { + message("No cluster pruning") + ALL_CLUSTERS[[length(ALL_CLUSTERS)+1]] <<- colnames(expr.matrix) + } + +} + +recursive_cluster_cutting(tumor.expr.data) + +dev.off() + +print(ALL_CLUSTERS) + + + + + + + + diff --git a/scripts/run.stub.R b/scripts/run.stub.R new file mode 100755 index 00000000..943cdfd0 --- /dev/null +++ b/scripts/run.stub.R @@ -0,0 +1,18 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--infercnv_obj", help="infercnv_obj file", required=TRUE, nargs=1) +args = parser$parse_args() + +library(infercnv) +library(ggplot2) +library(futile.logger) + +infercnv_obj_file = args$infercnv_obj + +infercnv_obj = readRDS(infercnv_obj_file) + +pdf('ladeda.pdf') + diff --git a/scripts/run_BayesNet.R b/scripts/run_BayesNet.R new file mode 100755 index 00000000..2a9e4c2a --- /dev/null +++ b/scripts/run_BayesNet.R @@ -0,0 +1,33 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--prelim_infercnv_obj", help="preliminary infercnv_obj file", required=TRUE, nargs=1) +parser$add_argument("--i6HMM_infercnv_obj", help="i6HMM infercnv_obj file", required=TRUE, nargs=1) + +parser$add_argument("--BayesMaxPNormal", help="BayesMaxPNormal", required=TRUE, nargs=1, type='double') +parser$add_argument("--out_dir", help="output directory", required=TRUE, nargs=1) + +args = parser$parse_args() + +library(infercnv) +library(futile.logger) + +infercnv_obj_prelim = readRDS(args$prelim_infercnv_obj) + +hmm.infercnv_obj = readRDS(args$i6HMM_infercnv_obj) + + +flog.info("Running Bayesian Network Model on HMM predicted CNV's\n") + +hmm.infercnv_obj <- infercnv::inferCNVBayesNet(infercnv_obj = infercnv_obj_prelim, + HMM_obj = hmm.infercnv_obj, + BayesMaxPNormal = args$BayesMaxPNormal, + file_dir = args$out_dir, + postMcmcMethod = "removeCNV", + out_dir = file.path(args$out_dir, "BayesNetOutput"), + quietly = TRUE) + + + diff --git a/scripts/run_HMM_each_cell_separately.R b/scripts/run_HMM_each_cell_separately.R new file mode 100755 index 00000000..0e703129 --- /dev/null +++ b/scripts/run_HMM_each_cell_separately.R @@ -0,0 +1,23 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--infercnv_obj", help="infercnv_obj file", required=TRUE, nargs=1) +args = parser$parse_args() + +library(infercnv) +library(ggplot2) +library(futile.logger) + +infercnv_obj_file = args$infercnv_obj + +infercnv_obj = readRDS(infercnv_obj_file) + + +infercnv_obj.hmm = infercnv:::predict_CNV_via_HMM_on_indiv_cells(infercnv_obj) + +saveRDS(infercnv_obj.hmm, file=sprintf("%s-HMM-icells.obj", infercnv_obj_file)) + +plot_cnv(infercnv_obj.hmm, output_filename=paste0(infercnv_obj_file, "-HMM-icells")) + diff --git a/scripts/run_HMM_on_hspike.R b/scripts/run_HMM_on_hspike.R new file mode 100755 index 00000000..14b705d7 --- /dev/null +++ b/scripts/run_HMM_on_hspike.R @@ -0,0 +1,27 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--infercnv_obj", help="infercnv_obj file", required=TRUE, nargs=1) +args = parser$parse_args() + +library(infercnv) +library(ggplot2) +library(futile.logger) + +infercnv_obj_file = args$infercnv_obj + +infercnv_obj = readRDS(infercnv_obj_file) + +hspike = infercnv_obj@.hspike + +hspike.hmm = infercnv:::predict_CNV_via_HMM_on_tumor_subclusters(infercnv_obj=hspike, + cnv_mean_sd=infercnv:::get_spike_dists(hspike), + cnv_level_to_mean_sd_fit=infercnv:::get_hspike_cnv_mean_sd_trend_by_num_cells_fit(hspike) + ) + +plot_cnv(hspike.hmm, x.center=3, x.range=c(0,6), output_filename=paste0(basename(infercnv_obj_file), ".hspike.hmm"), out_dir=dirname(infercnv_obj_file)) + +saveRDS(hspike.hmm, file=sprintf("%s-HMM.obj", infercnv_obj_file)) + diff --git a/scripts/run_HMM_on_subclusters.R b/scripts/run_HMM_on_subclusters.R new file mode 100755 index 00000000..1eea2b79 --- /dev/null +++ b/scripts/run_HMM_on_subclusters.R @@ -0,0 +1,29 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--infercnv_obj", help="infercnv_obj file", required=TRUE, nargs=1) +args = parser$parse_args() + +library(infercnv) +library(ggplot2) +library(futile.logger) + +infercnv_obj_file = args$infercnv_obj + +infercnv_obj = readRDS(infercnv_obj_file) + + +if (length(infercnv_obj@tumor_subclusters) == 0) { + flog.info("Computing tumor subclusters") + infercnv_obj <- infercnv:::.subcluster_tumors_general(infercnv_obj) +} + + +infercnv_obj.hmm = infercnv:::predict_CNV_via_HMM_on_tumor_subclusters(infercnv_obj) + +saveRDS(infercnv_obj.hmm, file=sprintf("%s-HMM.obj", infercnv_obj_file)) + +plot_cnv(infercnv_obj.hmm, output_filename=paste0(infercnv_obj_file, "-HMM")) + diff --git a/scripts/run_HMM_per_chr.R b/scripts/run_HMM_per_chr.R new file mode 100755 index 00000000..bdf74437 --- /dev/null +++ b/scripts/run_HMM_per_chr.R @@ -0,0 +1,23 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) + +parser = ArgumentParser() +parser$add_argument("--infercnv_obj", help="infercnv_obj file", required=TRUE, nargs=1) +args = parser$parse_args() + +library(infercnv) +library(ggplot2) +library(futile.logger) + +infercnv_obj_file = args$infercnv_obj + +infercnv_obj = readRDS(infercnv_obj_file) + +pdf('ladeda.pdf') +infercnv_obj.hmm = infercnv:::predict_CNV_via_HMM_each_chr_separately(infercnv_obj) + +saveRDS(infercnv_obj.hmm, file=sprintf("%s-HMM.obj", infercnv_obj_file)) + +plot_cnv(infercnv_obj.hmm, output_filename=paste0(infercnv_obj_file, "-HMM")) + diff --git a/scripts/sim_vs_orig_counts.QQplot.R b/scripts/sim_vs_orig_counts.QQplot.R new file mode 100755 index 00000000..9711650a --- /dev/null +++ b/scripts/sim_vs_orig_counts.QQplot.R @@ -0,0 +1,99 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) +library(infercnv) +library(tidyverse) + + +parser = ArgumentParser() +parser$add_argument("--counts_matrix", help="raw counts matrix file", required=TRUE, nargs=1) +parser$add_argument("--sim_method", help="simulation method: splatter, simple, meanvar", required=TRUE) +parser$add_argument("--include_dropout", default=FALSE, action='store_true', help='include dropout modeling') +args = parser$parse_args() + + +include.dropout = args$include_dropout + + +data = read.table(args$counts_matrix) +data = as.matrix(data) + +orig.counts = data + +if (! any(args$sim_method %in% c('splatter', 'simple', 'meanvar'))) { + stop(sprintf("Error, not recognizing sim method: %s", args$sim_method)) +} + + +#' normalize first: +cs = colSums(data) +median_cs = median(cs) +data <- sweep(data, STATS=cs, MARGIN=2, FUN="/") +data <- data * median_cs + +gene_means <- rowMeans(data) + +num_cells = ncol(data) + +## sim the tumor matrix +sim_method = args$sim_method +if (sim_method == 'simple') { + message('-using simple sim') + + mean_p0_table <- NULL + if (include.dropout) { + mean_p0_table <- infercnv:::.get_mean_vs_p0_from_matrix(data) + } + + sim_matrix <- infercnv:::.get_simulated_cell_matrix(gene_means, + mean_p0_table=mean_p0_table, + num_cells=num_cells, + common_dispersion=0.1) +} else if (sim_method == 'splatter') { + message('-using splatter sim') + + params <- infercnv:::.estimateSingleCellParamsSplatterScrape(orig.counts) + + params[['nCells']] <- num_cells + params[['include.dropout']] <- include.dropout + + gene_means[gene_means == 0] <- 1e-3 + sim_matrix <- infercnv:::.simulateSingleCellCountsMatrixSplatterScrape(params, gene_means) + sim_matrix <- counts(sim_matrix) + +} else if (sim_method == 'meanvar') { + message('-using meanvar sim') + ##tumor_sim_matrix <- infercnv:::.get_simulated_cell_matrix_using_meanvar_trend_given_normal_matrix(gene_means, data, args$num_tumor_cells) + sim_matrix <- infercnv:::.get_simulated_cell_matrix_using_meanvar_trend_given_normal_matrix(gene_means, data, num_cells, include.dropout=include.dropout) + +} else { + stop(sprintf("not recognizing --sim_method: %s", args$sim_method)) +} + + +## Plotting +if (include.dropout) { + sim_method <- sprintf("%s-With_Dropout", sim_method) +} else { + sim_method <- sprintf("%s-NO_Dropout", sim_method) +} + +rownames(sim_matrix) <- names(gene_means) +colnames(sim_matrix) <- colnames(data) +sim_matrix_filename <- sprintf("sim.%s.counts.matrix", sim_method) +message("-writing matrix") +write.table(sim_matrix, sim_matrix_filename, quote=F, sep="\t") + +## total sum normalize sim matrix before plotting +sim_matrix <- infercnv:::.normalize_data_matrix_by_seq_depth(sim_matrix, median_cs) + +message("-plotting QQ plot") +png(sprintf("sim_vs_orig_counts.%s.qqplots.png", sim_method)) +qqplot(log(as.numeric(data)+1), log(as.numeric(sim_matrix)+1), main='orig vs. full sim') +abline(a=0,b=1,col='red') + +message("-plotting KS plot") +png(sprintf("sim_vs_orig_counts.%s.KS.png", sim_method)) +infercnv:::KS_plot(sprintf("KS, %s", sim_method), log(as.numeric(data)+1), log(as.numeric(sim_matrix)+1), names=c('orig', sim_method)) + + diff --git a/scripts/splatterScrape_sim_counts.R b/scripts/splatterScrape_sim_counts.R new file mode 100755 index 00000000..6998b20f --- /dev/null +++ b/scripts/splatterScrape_sim_counts.R @@ -0,0 +1,67 @@ +#!/usr/bin/env Rscript + +suppressPackageStartupMessages(library("argparse")) +options(error = function() {traceback(2);quit(save = "no", status = 0, runLast = FALSE)}) + +parser = ArgumentParser() + +parser$add_argument("--counts_matrix", help="raw counts matrix file", required=TRUE, default=NULL, nargs=1) +parser$add_argument("--ncells", help="number of cells to simulate", required=TRUE, type='integer', nargs=1) +parser$add_argument("--ngenes", help="number of genes to simulate", required=TRUE, type='integer', nargs=1) +parser$add_argument("--output", help='name of output matrix file', required=TRUE, nargs=1) + +args = parser$parse_args() + +library(infercnv) +library(SingleCellExperiment) +library("methods") +library(splatter) + + +counts_matrix = read.table(args$counts_matrix) +params_file = sprintf("%s.params_obj", args$counts_matrix) +if (file.exists(params_file)) { + message("-note, reusing stored params") + params = readRDS(params_file) +} else { + params <- infercnv:::.estimateSingleCellParamsSplatterScrape(counts_matrix) + saveRDS(params, file=sprintf("%s.params_obj", args$counts_matrix)) +} + +ncells = args$ncells +ngenes = args$ngenes +output_filename = args$output + +data = as.matrix(counts_matrix) + +#' normalize first: +cs = colSums(counts_matrix) +median_cs = median(cs) +data <- sweep(counts_matrix, STATS=cs, MARGIN=2, FUN="/") +data <- data * median_cs + +## sim using specified gene means +gene_means = rowMeans(data) +gene_means = gene_means[gene_means>0] + +gene_means = sample(x=gene_means, size=ngenes, replace=T) + +newnames = paste0('gene', 1:ngenes) + +names(gene_means) = newnames + + +params[['nGenes']] = ngenes +params[['nCells']] = ncells + + +sim_matrix <- infercnv:::.simulateSingleCellCountsMatrixSplatterScrape(params, gene_means) +sim_matrix <- counts(sim_matrix) + +write.table(sim_matrix, file=output_filename, quote=F, sep='\t') + + + + + + diff --git a/tests/testthat/test_infer_cnv.R b/tests/testthat/test_infer_cnv.R index 5bea8019..0b33fc77 100755 --- a/tests/testthat/test_infer_cnv.R +++ b/tests/testthat/test_infer_cnv.R @@ -1,6 +1,7 @@ # Global data -make_fake_infercnv_obj <- function(some_matrix) { + +make_fake_infercnv_obj_midpoint_ref <- function(some_matrix) { num_cells = ncol(some_matrix) num_genes = nrow(some_matrix) @@ -14,7 +15,7 @@ make_fake_infercnv_obj <- function(some_matrix) { stop=1:num_genes) midpt_cells = floor(num_cells/2) - + normal_cells = 1:midpt_cells tumor_cells = (midpt_cells+1):num_cells @@ -30,6 +31,40 @@ make_fake_infercnv_obj <- function(some_matrix) { } +make_fake_infercnv_obj <- function(some_matrix, ref_idx, ref_names="a") { + + num_cells = ncol(some_matrix) + num_genes = nrow(some_matrix) + + if (num_cells < 2) { + stop("Error, need at least 2 cells in the matrix") + } + + gene_order <- data.frame(chr=rep("chr1", num_genes), + start=1:num_genes, + stop=1:num_genes) + + #normal_cells = ref_idx + tumor_cells = c(1:num_cells)[-unlist(ref_idx)] + + ref_group_cell_indices = list() + for (i in 1:length(ref_names)) { + ref_group_cell_indices[[ ref_names[i] ]] <- unlist(ref_idx[i]) + } + + infercnv_obj <- new( + Class = "infercnv", + expr.data = some_matrix, + count.data = some_matrix, + gene_order = gene_order, + #reference_grouped_cell_indices = list(normal=normal_cells), + reference_grouped_cell_indices = ref_group_cell_indices, + observation_grouped_cell_indices = list(tumor=tumor_cells) ) + + return(infercnv_obj) + +} + matrix_zeros <- matrix(rep(0,5), ncol=1) @@ -78,38 +113,42 @@ matrix_averef_five_answer <- matrix(c(c(-1,0,0,0,0,-1,0,0,1,0), ncol=10, byrow=TRUE) + +test1_in = make_fake_infercnv_obj(t(matrix_one), list(c(1))) +test1_out = infercnv::subtract_ref_expr_from_obs(test1_in) test_that("subtract_ref works with one observation, one reference",{ - expect_equal(infercnv:::.subtract_expr(t(matrix_one), - ref_groups=list(c(1))), - t(avref_answer_1)) - }) + expect_equal(test1_out@expr.data, t(avref_answer_1)) +}) + +test2_in = make_fake_infercnv_obj(t(matrix_two), list(c(1))) +test2_out = infercnv::subtract_ref_expr_from_obs(test2_in) test_that("subtract_ref works with two observations, one reference",{ - expect_equal(infercnv:::.subtract_expr(t(matrix_two), - ref_groups=list(c(1))), - t(avref_answer_2)) - }) + expect_equal(test2_out@expr.data, t(avref_answer_2)) +}) +test3_in = make_fake_infercnv_obj(t(matrix_three), list(c(1, 3))) +test3_out = infercnv::subtract_ref_expr_from_obs(test3_in) test_that("subtract_ref updated works with 3 observaions, two reference",{ - expect_equal(infercnv:::.subtract_expr(t(matrix_three), - ref_groups=list(c(1,3))), - t(avref_answer_3)) - }) + expect_equal(test3_out@expr.data, t(avref_answer_3)) +}) + +test4_in = make_fake_infercnv_obj(t(matrix_five), list(c(2, 5))) +test4_out = infercnv::subtract_ref_expr_from_obs(test4_in) test_that("subtract_ref works with 5 observations, two reference",{ - expect_equal(infercnv:::.subtract_expr(t(matrix_five), - ref_groups=list(c(2,5))), - t(avref_answer_4)) - }) + expect_equal(test4_out@expr.data, t(avref_answer_4)) +}) + +test5_in = make_fake_infercnv_obj(t(matrix_zeros), list(c(1))) +test5_out = infercnv::subtract_ref_expr_from_obs(test5_in) test_that("subtract_ref works with 1 observation, 1 reference",{ - expect_equal(infercnv:::.subtract_expr(t(matrix_zeros), - ref_groups=list(c(1))), - t(avref_answer_5)) + expect_equal(test5_out@expr.data, t(avref_answer_5)) }) +test6_in = make_fake_infercnv_obj(t(matrix_averef_five), list(c(2),c(4,6,8),c(10)), ref_names=c("a", "b", "c")) +test6_out = infercnv::subtract_ref_expr_from_obs(test6_in, use_bounds=TRUE) test_that("subtract_ref works with 10 obs, 5 references, 3 groups",{ - expect_equal(infercnv:::.subtract_expr(t(matrix_averef_five), - ref_groups=list(c(2),c(4,6,8),c(10))), - matrix_averef_five_answer) - }) + expect_equal(test6_out@expr.data, matrix_averef_five_answer) +}) diff --git a/vignettes/inferCNV.Rmd b/vignettes/inferCNV.Rmd old mode 100755 new mode 100644 index 6f5d631b..e857b996 --- a/vignettes/inferCNV.Rmd +++ b/vignettes/inferCNV.Rmd @@ -16,7 +16,7 @@ author: date: "`r Sys.Date()`" output: BiocStyle::html_document: default -package: inferCNV +package: infercnv abstract: > InferCNV is used to explore tumor single cell RNA-Seq data to identify evidence for large-scale chromosomal copy number variations, such as gains or deletions of entire chromosomes or large segments of chromosomes. This is done by exploring expression intensity of genes across positions of the genome in comparison to the average or a set of reference 'normal' cells. A heatmap is generated illustrating the relative expression intensities across each chromosome, and it becomes readily apparent as to which regions of the genome are over-abundant or less-abundant as compared to normal cells (or the average, if reference normal cells are not provided). vignette: > @@ -29,15 +29,32 @@ vignette: > # Installation ## Required dependencies -_inferCNV_ uses the _R_ packages `r CRANpkg("ape")`, `r CRANpkg("binhf")`, `r CRANpkg("fastcluster")`, `r CRANpkg("futile.logger")`, `r CRANpkg("RColorBrewer")`, `r CRANpkg("Seurat")` and imports functions from the archived `r CRANpkg("GMD")`. +_inferCNV_ uses the _R_ packages `r CRANpkg("ape")`, `r Biocpkg("BiocGenerics")`, `r CRANpkg("binhf")`, `r CRANpkg("caTools")`, `r CRANpkg("coda")`, `r CRANpkg("coin")`, `r CRANpkg("dplyr")`, `r CRANpkg("doparallel")`, `r Biocpkg("edgeR")`, `r CRANpkg("fastcluster")`, `r CRANpkg("fitdistrplus")`, `r CRANpkg("foreach")`, `r CRANpkg("futile.logger")`, `r CRANpkg("future")`, `r CRANpkg("gplots")`, `r CRANpkg("ggplot2")`, `r CRANpkg("HiddenMarkov")`, `r CRANpkg("reshape")`, `r CRANpkg("rjags")`, `r CRANpkg("RColorBrewer")`, `r Biocpkg("SingleCellExperiment")`, `r Biocpkg("SummarizedExperiment")` and imports functions from the archived `r CRANpkg("GMD")`. @@ -50,17 +67,15 @@ biocLite("inferCNV") ``` ## Optional dependencies -If you want to use the interactive heatmap visualization, please also install the _R_ packages `r CRANpkg("dplyr")`, `r CRANpkg("tibble")`, `r Githubpkg("bmbroom/tsvio")` and `r Githubpkg("bmbroom/NGCHMR")`. To install optional packages, type the following in an R command window: +If you want to use the interactive heatmap visualization, please also install the _R_ packages `r CRANpkg("tibble")`, `r Githubpkg("bmbroom/tsvio")` and `r Githubpkg("bmbroom/NGCHMR")`. To install optional packages, type the following in an R command window: ```{r install-optionals, eval = FALSE} -install.packages("dplyr") install.packages("tibble") install.packages("devtools") @@ -89,15 +104,45 @@ Reading in the raw counts matrix and meta data, populating the infercnv object ```{r} infercnv_obj = CreateInfercnvObject( - raw_counts_matrix="../example/oligodendroglioma_expression_downsampled.counts.matrix", - annotations_file="../example/oligodendroglioma_annotations_downsampled.txt", + raw_counts_matrix="../inst/extdata/oligodendroglioma_expression_downsampled.counts.matrix.gz", + annotations_file="../inst/extdata/oligodendroglioma_annotations_downsampled.txt", delim="\t", - gene_order_file="../example/gencode_downsampled.txt", + gene_order_file="../inst/extdata/gencode_downsampled.EXAMPLE_ONLY_DONT_REUSE.txt", ref_group_names=c("Microglia/Macrophage","Oligodendrocytes (non-malignant)")) ``` + +## Running the full default analysis +```{r, results="hide"} +infercnv_obj_default = infercnv::run( + infercnv_obj, + cutoff=1, # cutoff=1 works well for Smart-seq2, and cutoff=0.1 works well for 10x Genomics + out_dir="../example_output", + cluster_by_groups=TRUE, + plot_steps=FALSE, + denoise=TRUE, + HMM=TRUE, + no_prelim_plot=TRUE, + png_res=60, + num_threads=2, + BayesMaxPNormal=0 +) + +``` + +Basic ouput from running inferCNV. +```{r, echo=FALSE} +knitr::include_graphics("../example_output/infercnv.png") +``` + +HMM preditions +```{r, echo=FALSE} +knitr::include_graphics("../example_output/infercnv.13_HMM_predHMMi6.hmm_mode-samples.repr_intensities.png") +``` + + ## Filtering genes Removing those genes that are very lowly expressed or present in very few cells @@ -114,29 +159,15 @@ infercnv_obj <- require_above_min_cells_ref(infercnv_obj, min_cells_per_gene=min ## for safe keeping infercnv_orig_filtered = infercnv_obj - -#plot_mean_chr_expr_lineplot(infercnv_obj) -save('infercnv_obj', file = '../example_output/infercnv_obj.orig_filtered') - -``` - - -## Normalize each cell's counts for sequencing depth - -```{r} -infercnv_obj <- infercnv:::normalize_counts_by_seq_depth(infercnv_obj) ``` -## Perform Anscombe normalization -Suggested by Matan for removing noisy variation at low counts +## Normalize each cell's counts for sequencing depth ```{r} -infercnv_obj <- infercnv:::anscombe_transform(infercnv_obj) +infercnv_obj <- infercnv::normalize_counts_by_seq_depth(infercnv_obj) ``` - ## Log transform the normalized counts: @@ -153,37 +184,19 @@ threshold = mean(abs(get_average_bounds(infercnv_obj))) infercnv_obj <- apply_max_threshold_bounds(infercnv_obj, threshold=threshold) ``` -## Initial view, before inferCNV operations: - -```{r, results="hide"} -plot_cnv(infercnv_obj, - out_dir='../example_output/', - output_filename='infercnv.logtransf', - x.range="auto", - title = "Before InferCNV (filtered & log2 transformed)", - color_safe_pal = FALSE, - x.center = mean(infercnv_obj@expr.data)) -``` - - -```{r, echo=FALSE} -knitr::include_graphics("../example_output/infercnv.logtransf.png") -``` - - ## Perform smoothing across chromosomes ```{r} infercnv_obj = smooth_by_chromosome(infercnv_obj, window_length=101, smooth_ends=TRUE) ``` - + ```{r} # re-center each cell infercnv_obj <- center_cell_expr_across_chromosome(infercnv_obj, method = "median") ``` - + ```{r, results='hide' } @@ -192,6 +205,7 @@ plot_cnv(infercnv_obj, output_filename='infercnv.chr_smoothed', x.range="auto", title = "chr smoothed and cells re-centered", + png_res=60, color_safe_pal = FALSE) ``` @@ -216,7 +230,8 @@ plot_cnv(infercnv_obj, out_dir='../example_output/', output_filename='infercnv.ref_subtracted', x.range="auto", - title="ref subtracted", + title="ref subtracted", + png_res=60, color_safe_pal = FALSE) ``` @@ -235,24 +250,6 @@ This is important because we want (1/2)x to be symmetrical to 1.5x, representing infercnv_obj <- invert_log2(infercnv_obj) ``` - - - -```{r, results="hide"} -plot_cnv(infercnv_obj, - out_dir='../example_output/', - output_filename='infercnv.inverted', - color_safe_pal = FALSE, - x.range="auto", - x.center=1, - title = "inverted log FC to FC") -``` - - -```{r, echo=FALSE} -knitr::include_graphics("../example_output/infercnv.inverted.png") -``` ## Removing noise @@ -270,7 +267,8 @@ plot_cnv(infercnv_obj, output_filename='infercnv.denoised', x.range="auto", x.center=1, - title="denoised", + title="denoised", + png_res=60, color_safe_pal = FALSE) ``` @@ -285,60 +283,8 @@ This generally improves on the visualization ```{r} infercnv_obj = remove_outliers_norm(infercnv_obj) ``` - - - -```{r, results="hide"} - - plot_cnv(infercnv_obj, - out_dir='../example_output/', - output_filename='infercnv.outliers_removed', - color_safe_pal = FALSE, - x.range="auto", - x.center=1, - title = "outliers removed") -``` - - -```{r, echo=FALSE} -knitr::include_graphics("../example_output/infercnv.outliers_removed.png") -``` - - - -## Find DE genes by comparing the mutant types to normal types, BASIC - -Runs a t-Test comparing tumor/normal for each patient and normal sample, and masks out those genes that are not significantly DE. - -```{r} - -plot_data = infercnv_obj@expr.data -high_threshold = max(abs(quantile(plot_data[plot_data != 0], c(0.05, 0.95)))) - -low_threshold = -1 * high_threshold - -infercnv_obj2 <- infercnv:::mask_non_DE_genes_basic(infercnv_obj, test.use = 't', center_val=1) - -``` -```{r, results="hide"} -plot_cnv(infercnv_obj2, - out_dir='../example_output/', - output_filename='infercnv.non-DE-genes-masked', - color_safe_pal = FALSE, - x.range=c(low_threshold, high_threshold), - x.center=1, - title = "non-DE-genes-masked") -``` - - -```{r, echo=FALSE} -knitr::include_graphics("../example_output/infercnv.non-DE-genes-masked.png") - -``` - # Additional Information ## Online Documentation