man/BoostClassifier.Rd

% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/doubletdetection.R
\docType{class}
\name{BoostClassifier-class}
\alias{BoostClassifier-class}
\alias{BoostClassifier}
\title{Classifier for doublets in single-cell RNA-seq data}
\usage{
clf <- BoostClassifier$new()
# raw_counts is a cells by genes count matrix
labels <- clf$fit(raw_counts)$predict()
#returns a vector of 1 for doublet and 0 for singlet
}
\arguments{
\item{raw_counts}{(matrix): Count matrix, oriented genes by cells. Can be sparse matrix or data.frame input.}

\item{p_thresh}{(numeric, optional): hypergeometric test p-value threshold that determines per iteration doublet calls}

\item{voter_thresh}{(numeric, optional): fraction of iterations a cell must be called a doublet}

\item{cell1, cell2}{(vector, numeric): Gene count vectors.}
}
\section{Fields}{

\describe{
\item{\code{boost_rate}}{(numeric, optional): Proportion of cell population size to produce as synthetic doublets.}

\item{\code{n_components}}{(integer optional): Number of principal components used for clustering.}

\item{\code{n_top_var_genes}}{(integer optional): Number of highest variance genes to use; other genes discarded. Will use all genes when zero.}

\item{\code{new_lib_as:}}{(function(integer vector) -> integer optional): Method to use in choosing library size for synthetic doublets. Defaults to NULL which makes synthetic doublets the exact addition of its parents; alternative is new_lib_as = max.}

\item{\code{replace}}{(logical, optional): If FALSE, a cell will be selected as a synthetic doublet's parent no more than once.}

\item{\code{phenograph_parameters}}{(list, optional): Parameter list to pass directly to Phenograph. Note that we change the Phenograph 'prune' default to TRUE; you must specifically include list(prune=FALSE) here to change this.}

\item{\code{n_iters}}{(integer optional): Number of fit operations from which to collect p-values. Defualt value is 25. normalizer ((matrix) -> matrix): Method to normalize raw_counts. Defaults to normalize_counts, included in this package. Note: To use normalize_counts with its pseudocount parameter changed from the default 0.1 value to some positive numeric `new_var`, use: normalizer=lambda counts: doubletdetection.normalize_counts(counts, pseudocount=new_var)}

\item{\code{normalizer}}{((matrix) -> matrix): Method to normalize raw_counts. Defaults to normalize_counts, included in this package. Note: To use normalize_counts with its pseudocount parameter changed from the default 0.1 value to some positive numeric `new_var`, use: normalizer=lambda counts: doubletdetection.normalize_counts(counts, pseudocount=new_var)}

\item{\code{num_genes,num_cells}}{(numeric): number of genes and cells, rows and columns of raw_counts matrix respectively.}

\item{\code{rawsynthetics}}{(matrix): Count matrix, oriented genes by cells. Synthetic doublets generated.}

\item{\code{parents_}}{(list): Pairs of column indices for cells sampled to generate synthetic doublets.}
}}

\section{Methods}{

\describe{
\item{\code{fit(raw_counts)}}{Fits the classifier on raw_counts.}

\item{\code{initialize(boost_rate = 0.25, n_components = 30L,
  n_top_var_genes = 10000L, new_lib_as = NULL, new_lib_as_fun = NULL,
  replace = FALSE, phenograph_parameters = list(prune = TRUE),
  n_iters = 25L, normalizer = normalize_counts)}}{This method is called when you create an instance of the class.}

\item{\code{predict(p_thresh = 0.01, voter_thresh = 0.9)}}{Produce doublet calls from fitted classifier.}
}}