forked from JonathanShor/DoubletDetection
-
Notifications
You must be signed in to change notification settings - Fork 0
/
BoostClassifier.Rd
61 lines (42 loc) · 3.46 KB
/
BoostClassifier.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/doubletdetection.R
\docType{class}
\name{BoostClassifier-class}
\alias{BoostClassifier-class}
\alias{BoostClassifier}
\title{Classifier for doublets in single-cell RNA-seq data}
\usage{
clf <- BoostClassifier$new()
# raw_counts is a cells by genes count matrix
labels <- clf$fit(raw_counts)$predict()
#returns a vector of 1 for doublet and 0 for singlet
}
\arguments{
\item{raw_counts}{(matrix): Count matrix, oriented genes by cells. Can be sparse matrix or data.frame input.}
\item{p_thresh}{(numeric, optional): hypergeometric test p-value threshold that determines per iteration doublet calls}
\item{voter_thresh}{(numeric, optional): fraction of iterations a cell must be called a doublet}
\item{cell1, cell2}{(vector, numeric): Gene count vectors.}
}
\section{Fields}{
\describe{
\item{\code{boost_rate}}{(numeric, optional): Proportion of cell population size to produce as synthetic doublets.}
\item{\code{n_components}}{(integer optional): Number of principal components used for clustering.}
\item{\code{n_top_var_genes}}{(integer optional): Number of highest variance genes to use; other genes discarded. Will use all genes when zero.}
\item{\code{new_lib_as:}}{(function(integer vector) -> integer optional): Method to use in choosing library size for synthetic doublets. Defaults to NULL which makes synthetic doublets the exact addition of its parents; alternative is new_lib_as = max.}
\item{\code{replace}}{(logical, optional): If FALSE, a cell will be selected as a synthetic doublet's parent no more than once.}
\item{\code{phenograph_parameters}}{(list, optional): Parameter list to pass directly to Phenograph. Note that we change the Phenograph 'prune' default to TRUE; you must specifically include list(prune=FALSE) here to change this.}
\item{\code{n_iters}}{(integer optional): Number of fit operations from which to collect p-values. Defualt value is 25. normalizer ((matrix) -> matrix): Method to normalize raw_counts. Defaults to normalize_counts, included in this package. Note: To use normalize_counts with its pseudocount parameter changed from the default 0.1 value to some positive numeric `new_var`, use: normalizer=lambda counts: doubletdetection.normalize_counts(counts, pseudocount=new_var)}
\item{\code{normalizer}}{((matrix) -> matrix): Method to normalize raw_counts. Defaults to normalize_counts, included in this package. Note: To use normalize_counts with its pseudocount parameter changed from the default 0.1 value to some positive numeric `new_var`, use: normalizer=lambda counts: doubletdetection.normalize_counts(counts, pseudocount=new_var)}
\item{\code{num_genes,num_cells}}{(numeric): number of genes and cells, rows and columns of raw_counts matrix respectively.}
\item{\code{rawsynthetics}}{(matrix): Count matrix, oriented genes by cells. Synthetic doublets generated.}
\item{\code{parents_}}{(list): Pairs of column indices for cells sampled to generate synthetic doublets.}
}}
\section{Methods}{
\describe{
\item{\code{fit(raw_counts)}}{Fits the classifier on raw_counts.}
\item{\code{initialize(boost_rate = 0.25, n_components = 30L,
n_top_var_genes = 10000L, new_lib_as = NULL, new_lib_as_fun = NULL,
replace = FALSE, phenograph_parameters = list(prune = TRUE),
n_iters = 25L, normalizer = normalize_counts)}}{This method is called when you create an instance of the class.}
\item{\code{predict(p_thresh = 0.01, voter_thresh = 0.9)}}{Produce doublet calls from fitted classifier.}
}}