Skip to content

Commit

Permalink
add names of three new function to NAMESPACE and three Rd files in ma…
Browse files Browse the repository at this point in the history
…n folder

Former-commit-id: c511bbc
  • Loading branch information
Jiangyuan-Liu committed Dec 7, 2017
1 parent 68f986e commit 7452d26
Show file tree
Hide file tree
Showing 100 changed files with 416 additions and 154 deletions.
Empty file modified CONDUCT.md
100644 → 100755
Empty file.
Empty file modified LICENSE
100644 → 100755
Empty file.
Empty file modified LICENSE.pheatmap
100644 → 100755
Empty file.
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ S3method(simulateCells,celda_G)
S3method(visualizeModelPerformance,celda_C)
S3method(visualizeModelPerformance,celda_CG)
S3method(visualizeModelPerformance,celda_G)
export(GiniPlot)
export(available_models)
export(calculateLoglikFromVariables)
export(calculatePerplexity)
Expand All @@ -37,6 +38,7 @@ export(clusterProbability.celda_G)
export(compareCountMatrix)
export(completeClusterHistory)
export(completeLogLikelihood)
export(diffExp_MAST)
export(distinct_colors)
export(factorizeMatrix)
export(finalClusterAssignment)
Expand All @@ -56,8 +58,10 @@ export(renderInteractiveKLPlot)
export(renderIterationLikelihoodPlot)
export(runParams)
export(seed)
export(selectModels)
export(semi_pheatmap)
export(simulateCells)
export(stateHeatmap)
export(topRank)
export(visualizeModelPerformance)
import(RColorBrewer)
Expand Down
Empty file modified R/StateHeatmap.R
100644 → 100755
Empty file.
2 changes: 1 addition & 1 deletion R/celda.R
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ available_models = c("celda_C", "celda_G", "celda_CG")
#' @return Object of class "celda_list", which contains results for all model parameter combinations and summaries of the run parameters
#' @import foreach
#' @export
celda = function(counts, model, sample.label=NULL, nchains=1, cores=1, seed=12345, verbose=TRUE, logfile_prefix="Celda", ...) {
celda = function(counts, model, sample.label=NULL, nchains=1, cores=1, seed=12345, verbose=FALSE, logfile_prefix="Celda", ...) {
message("Starting celda...")
validateArgs(counts, model, sample.label, nchains, cores, seed, ...)

Expand Down
16 changes: 13 additions & 3 deletions R/celda_C.R
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -204,18 +204,18 @@ celda_C = function(counts, sample.label=NULL, K, alpha=1, beta=1,
iter = iter + 1
}

reordered.labels = reorder.label.by.size(z.best, K)
z.final.reorder = reordered.labels$new.labels
names = list(row=rownames(counts), column=colnames(counts), sample=levels(sample.label))

result = list(z=z.final.reorder, completeLogLik=ll,
result = list(z=z.best, completeLogLik=ll,
finalLogLik=ll.best, seed=seed, K=K,
sample.label=sample.label, alpha=alpha,
beta=beta, count.checksum=count.checksum,
names=names)

class(result) = "celda_C"

result = reorder.celdaC(counts = counts, res = result)

return(result)
}

Expand Down Expand Up @@ -351,6 +351,16 @@ cC.calcLL = function(m.CP.by.S, n.CP.by.G, s, z, K, nS, alpha, beta) {
return(final)
}

reorder.celdaC = function(counts,res){
#Reorder K
fm <- factorizeMatrix(counts = counts, celda.mod = res)
fm.norm <- t(normalizeCounts(t(fm$proportions$gene.states),scale.factor = 1))
d <- dist(t(fm.norm),diag = TRUE, upper = TRUE)
h <- hclust(d, method = "complete")
res <- recodeClusterZ(res,from = h$order,
to = c(1:ncol(fm$counts$gene.states)))
return(res)
}

#' Generate factorized matrices showing each feature's influence on the celda_C model clustering
#'
Expand Down
49 changes: 34 additions & 15 deletions R/celda_CG.R
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ calculateLoglikFromVariables.celda_CG = function(counts, s, z, y, K, L, alpha, b
return(final)
}

cCG.calcLL = function(K, L, m.CP.by.S, n.CP.by.TS, n.by.G, n.by.TS, nG.by.TS, nS, nG, alpha, beta, delta, gamma) {
.calcLL = function(K, L, m.CP.by.S, n.CP.by.TS, n.by.G, n.by.TS, nG.by.TS, nS, nG, alpha, beta, delta, gamma) {

## Determine if any TS has 0 genes
## Need to remove 0 gene states as this will cause the likelihood to fail
Expand Down Expand Up @@ -142,7 +142,7 @@ cCG.calcLL = function(K, L, m.CP.by.S, n.CP.by.TS, n.by.G, n.by.TS, nG.by.TS, nS
return(final)
}

cCG.calcGibbsProbZ = function(m.CP.by.S, n.CP.by.TS, n.CP, L, alpha, beta) {
.calcGibbsProbZ = function(m.CP.by.S, n.CP.by.TS, n.CP, L, alpha, beta) {

## Calculate for "Theta" component
theta.ll = log(m.CP.by.S + alpha)
Expand All @@ -158,7 +158,7 @@ cCG.calcGibbsProbZ = function(m.CP.by.S, n.CP.by.TS, n.CP, L, alpha, beta) {
return(final)
}

cCG.calcGibbsProbY = function(n.CP.by.TS, n.by.TS, nG.by.TS, nG.in.Y, beta, delta, gamma) {
.calcGibbsProbY = function(n.CP.by.TS, n.by.TS, nG.by.TS, nG.in.Y, beta, delta, gamma) {

## Calculate for "Phi" component
phi.ll = sum(lgamma(n.CP.by.TS + beta))
Expand All @@ -176,6 +176,25 @@ cCG.calcGibbsProbY = function(n.CP.by.TS, n.by.TS, nG.by.TS, nG.in.Y, beta, delt
return(final)
}

reorder.celdaCG = function(counts,res){
#Reorder K
fm <- factorizeMatrix(counts = counts, celda.mod = res)
fm.norm <- t(normalizeCounts(t(fm$proportions$population.states),scale.factor = 1))
d <- dist(t(fm.norm),diag = TRUE, upper = TRUE)
h <- hclust(d, method = "complete")
res <- recodeClusterZ(res,from = h$order,
to = c(1:ncol(fm$counts$population.states)))

#Reorder L
fm <- factorizeMatrix(counts = counts, celda.mod = res)
fm.norm <- t(normalizeCounts(t(fm$proportions$population.states),scale.factor = 1))
d <- dist((fm.norm),diag = TRUE, upper = TRUE)
h <- hclust(d, method = "complete")
res <- recodeClusterY(res,from = h$order,
to = c(1:nrow(fm$counts$population.states)))
return(res)
}

#' Simulate cells from the cell/gene clustering generative model
#'
#' @param S The number of samples
Expand Down Expand Up @@ -246,8 +265,8 @@ simulateCells.celda_CG = function(model, S=10, C.Range=c(50,100), N.Range=c(500,
zero.row.idx = which(rowSums(cell.counts) == 0)
if (length(zero.row.idx > 0)) {
cell.counts = cell.counts[-zero.row.idx, ]
}
new$y = new$y[-zero.row.idx]
new$y = new$y[-zero.row.idx]
}

## Assign gene/cell/sample names
rownames(cell.counts) = paste0("Gene_", 1:nrow(cell.counts))
Expand Down Expand Up @@ -332,7 +351,7 @@ celda_CG = function(counts, sample.label=NULL, K, L, alpha=1, beta=1,
nG = nrow(counts)
nM = ncol(counts)

ll = cCG.calcLL(K=K, L=L, m.CP.by.S=m.CP.by.S, n.CP.by.TS=n.CP.by.TS, n.by.G=n.by.G, n.by.TS=n.by.TS, nG.by.TS=nG.by.TS, nS=nS, nG=nG, alpha=alpha, beta=beta, delta=delta, gamma=gamma)
ll = .calcLL(K=K, L=L, m.CP.by.S=m.CP.by.S, n.CP.by.TS=n.CP.by.TS, n.by.G=n.by.G, n.by.TS=n.by.TS, nG.by.TS=nG.by.TS, nS=nS, nG=nG, alpha=alpha, beta=beta, delta=delta, gamma=gamma)

iter = 1
continue = TRUE
Expand All @@ -358,7 +377,7 @@ celda_CG = function(counts, sample.label=NULL, K, L, alpha=1, beta=1,
temp.n.CP = n.CP
temp.n.CP[j] = temp.n.CP[j] + sum(counts[,i])

probs[j] = cCG.calcGibbsProbZ(m.CP.by.S=m.CP.by.S[j,s[i]], n.CP.by.TS=temp.n.CP.by.TS, n.CP=temp.n.CP, L=L, alpha=alpha, beta=beta)
probs[j] = .calcGibbsProbZ(m.CP.by.S=m.CP.by.S[j,s[i]], n.CP.by.TS=temp.n.CP.by.TS, n.CP=temp.n.CP, L=L, alpha=alpha, beta=beta)
}

## Sample next state and add back counts
Expand Down Expand Up @@ -410,7 +429,7 @@ celda_CG = function(counts, sample.label=NULL, K, L, alpha=1, beta=1,
temp.nG.by.TS = nG.by.TS + (1 * ADD_PSEUDO)
temp.nG.by.TS[j] = temp.nG.by.TS[j] + 1

probs[j] = cCG.calcGibbsProbY(n.CP.by.TS=temp.n.CP.by.TS,
probs[j] = .calcGibbsProbY(n.CP.by.TS=temp.n.CP.by.TS,
n.by.TS=temp.n.by.TS,
nG.by.TS=temp.nG.by.TS,
nG.in.Y=temp.nG.by.TS[j],
Expand Down Expand Up @@ -481,7 +500,7 @@ celda_CG = function(counts, sample.label=NULL, K, L, alpha=1, beta=1,
}

## Calculate complete likelihood
temp.ll = cCG.calcLL(K=K, L=L, m.CP.by.S=m.CP.by.S, n.CP.by.TS=n.CP.by.TS, n.by.G=n.by.G, n.by.TS=n.by.TS, nG.by.TS=nG.by.TS, nS=nS, nG=nG, alpha=alpha, beta=beta, delta=delta, gamma=gamma)
temp.ll = .calcLL(K=K, L=L, m.CP.by.S=m.CP.by.S, n.CP.by.TS=n.CP.by.TS, n.by.G=n.by.G, n.by.TS=n.by.TS, nG.by.TS=nG.by.TS, nS=nS, nG=nG, alpha=alpha, beta=beta, delta=delta, gamma=gamma)
if((all(temp.ll > ll)) | iter == 1) {
z.best = z
y.best = y
Expand All @@ -493,21 +512,21 @@ celda_CG = function(counts, sample.label=NULL, K, L, alpha=1, beta=1,
iter = iter + 1
}

## Peform reordering on final Z and Y assigments:
reordered.labels = reorder.labels.by.size.then.counts(counts, z=z.best,
y=y.best, K=K, L=L)

names = list(row=rownames(counts), column=colnames(counts),
sample=levels(sample.label))


result = list(z=reordered.labels$z, y=reordered.labels$y, completeLogLik=ll,
result = list(z=z.best, y=y.best, completeLogLik=ll,
finalLogLik=ll.best, K=K, L=L, alpha=alpha,
beta=beta, delta=delta, gamma=gamma, seed=seed,
sample.label=sample.label, names=names,
count.checksum=count.checksum)

class(result) = "celda_CG"

## Peform reordering on final Z and Y assigments:
result = reorder.celdaCG(counts = counts, res = result)
return(result)
}

Expand Down Expand Up @@ -637,7 +656,7 @@ clusterProbability.celda_CG = function(counts, celda.mod) {
temp.n.CP = n.CP
temp.n.CP[j] = temp.n.CP[j] + sum(counts[,i])

z.prob[i,j] = cCG.calcGibbsProbZ(m.CP.by.S=m.CP.by.S[j,s[i]], n.CP.by.TS=temp.n.CP.by.TS, n.CP=temp.n.CP, L=L, alpha=alpha, beta=beta)
z.prob[i,j] = .calcGibbsProbZ(m.CP.by.S=m.CP.by.S[j,s[i]], n.CP.by.TS=temp.n.CP.by.TS, n.CP=temp.n.CP, L=L, alpha=alpha, beta=beta)
}

m.CP.by.S[z[i],s[i]] = m.CP.by.S[z[i],s[i]] + 1
Expand Down Expand Up @@ -667,7 +686,7 @@ clusterProbability.celda_CG = function(counts, celda.mod) {
temp.nG.by.TS = nG.by.TS + (1 * ADD_PSEUDO)
temp.nG.by.TS[j] = temp.nG.by.TS[j] + 1

y.prob[i,j] = cCG.calcGibbsProbY(n.CP.by.TS=temp.n.CP.by.TS,
y.prob[i,j] = .calcGibbsProbY(n.CP.by.TS=temp.n.CP.by.TS,
n.by.TS=temp.n.by.TS,
nG.by.TS=temp.nG.by.TS,
nG.in.Y=temp.nG.by.TS[j],
Expand Down
23 changes: 17 additions & 6 deletions R/celda_G.R
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,16 @@ cG.calcLL = function(n.TS.by.C, n.by.TS, n.by.G, nG.by.TS, nM, nG, L, beta, delt
return(final)
}


reorder.celdaG = function(counts,res){
#Reorder L
fm <- factorizeMatrix(counts = counts, celda.mod = res)
fm.norm <- t(normalizeCounts(fm$proportions$gene.states,scale.factor = 1))
d <- dist((fm.norm),diag = TRUE, upper = TRUE)
h <- hclust(d, method = "complete")
res <- recodeClusterY(res,from = h$order,
to = c(1:nrow(fm$counts$cell.states)))
return(res)
}


# Calculate Log Likelihood For Single Set of Cluster Assignments (Gene Clustering)
Expand Down Expand Up @@ -297,16 +306,16 @@ celda_G = function(counts, L, beta=1, delta=1, gamma=1, max.iter=25,
iter <- iter + 1
}

reordered.labels = reorder.label.by.size(y.best, L)
y.final.order = reordered.labels$new.labels
names = list(row=rownames(counts), column=colnames(counts))

result = list(y=y.final.order, completeLogLik=ll,
result = list(y=y.best, completeLogLik=ll,
finalLogLik=ll.best, L=L, beta=beta, delta=delta, gamma=gamma,
count.checksum=count.checksum, seed=seed, names=names)

class(result) = "celda_G"

result = reorder.celdaG(counts = counts, res = result)

return(result)
}

Expand Down Expand Up @@ -361,8 +370,10 @@ simulateCells.celda_G = function(model, C=100, N.Range=c(500,5000), G=1000,
## Ensure that there are no all-0 rows in the counts matrix, which violates a celda modeling
## constraint (columns are guarnteed at least one count):
zero.row.idx = which(rowSums(cell.counts) == 0)
cell.counts = cell.counts[-zero.row.idx, ]
y = y[-zero.row.idx]
if (length(zero.row.idx > 0)) {
cell.counts = cell.counts[-zero.row.idx, ]
y = y[-zero.row.idx]
}

rownames(cell.counts) = paste0("Gene_", 1:nrow(cell.counts))
colnames(cell.counts) = paste0("Cell_", 1:ncol(cell.counts))
Expand Down
Empty file modified R/celda_functions.R
100644 → 100755
Empty file.
Empty file modified R/celda_heatmap.R
100644 → 100755
Empty file.
47 changes: 36 additions & 11 deletions R/celda_list.R
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,26 @@
# S3 Methods for celda_list objects #
################################################################################

#' Run the celda Bayesian hierarchical model on a matrix of counts.
#' TODO: - If no chain provided, automatically choose best chain
#' - Smarter subsetting of the run.params to DRY this function up
# TODO: - If no chain provided, automatically choose best chain
# - Smarter subsetting of the run.params to DRY this function up

#' Select the best model amongst a list of models generated in a celda run.
#'
#' @param celda.list A celda_list object returned from celda()
#' @param K The K parameter for the desired model in the results list
#' @param L The L parameter for the desired model in the results list
#' @param chain The desired chain for the specified model
#' @param best Method for choosing best chain automatically. Options are c("perplexity", "loglik"). See documentation for chooseBestModel for details. Overrides chain parameter if provided.
#' @param K The K parameter for the desired model in the results list. Defaults to NULL.
#' @param L The L parameter for the desired model in the results list. Defaults to NULL.
#' @param chain The desired chain for the specified model. Defaults to NULL. Overrides best parameter if provided.
#' @param best Method for choosing best chain automatically. Options are c("perplexity", "loglik"). See documentation for chooseBestModel for details. Defaults to "loglik."
#' @return A celda model object matching the provided parameters (of class "celda_C", "celda_G", "celda_CG" accordingly), or NA if one is not found.
#' @export
getModel = function(celda.list, K=NULL, L=NULL, chain=1, best=NULL) {
getModel = function(celda.list, K=NULL, L=NULL, chain=NULL, best="loglik") {
validateGetModelParams(celda.list, K, L, chain, best) # Sanity check params

requested.chain = NA
run.params = celda.list$run.params

if (celda.list$content.type == "celda_CG") {
if (!is.null(best)) {
if (is.null(chain)) {
matching.chain.idx = run.params[run.params$K == K & run.params$L == L, "index"]
requested.chain = chooseBestChain(celda.list$res.list[matching.chain.idx], best)
} else {
Expand All @@ -32,7 +33,7 @@ getModel = function(celda.list, K=NULL, L=NULL, chain=1, best=NULL) {


if (celda.list$content.type == "celda_C") {
if (!is.null(best)) {
if (is.null(chain)) {
matching.chain.idx = run.params[run.params$K == K, "index"]
requested.chain = chooseBestChain(celda.list$res.list[matching.chain.idx], best)
} else {
Expand All @@ -43,7 +44,7 @@ getModel = function(celda.list, K=NULL, L=NULL, chain=1, best=NULL) {


if (celda.list$content.type == "celda_G") {
if (!is.null(best)) {
if (is.null(chain)) {
matching.chain.idx = run.params[run.params$L == L, "index"]
requested.chain = chooseBestChain(celda.list$res.list[matching.chain.idx], best)
} else {
Expand All @@ -62,6 +63,30 @@ getModel = function(celda.list, K=NULL, L=NULL, chain=1, best=NULL) {
}


#' Select a set of models from a celda_list objects based off of rows in its run.params attribute.
#'
#' @param celda.list A celda_list object returned from celda()
#' @param run.param.rows Row indices in the celda.list's run params corresponding to the desired models.
#' @return A celda_list containing celda model objects matching the provided parameters (of class "celda_C", "celda_G", "celda_CG" accordingly), or NA if one is not found.
#' @export
selectModels = function(celda.list, run.param.rows) {
desired.models = lapply(run.param.rows,
function(row.idx) {
if (!is.numeric(row.idx) |
row.idx < 0 |
row.idx > nrow(celda.list$run.params)) {
stop("Invalid row index provided")
}
params = celda.list$run.params[row.idx, ]
getModel(celda.list, params$K, params$L, params$chain)
})
subsetted.celda.list = list(run.params=celda.list$run.params[run.param.rows, ],
res.list=desired.models, content.type=celda.list$content.type,
count.checksum=celda.list$count.checksum)
return(subsetted.celda.list)
}


validateGetModelParams = function(celda.list, K, L, chain, best) {
if (class(celda.list) != "celda_list") stop("First argument to getModel() should be an object of class 'celda_list'")

Expand Down
Empty file modified R/data.R
100644 → 100755
Empty file.
Empty file modified R/diffExp.R
100644 → 100755
Empty file.
Empty file modified R/feature_selection.R
100644 → 100755
Empty file.
Empty file modified R/model_performance.R
100644 → 100755
Empty file.
4 changes: 2 additions & 2 deletions R/plot_dr.R
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ plotDrGrid <- function(dim1, dim2, matrix, size, xlab, ylab, color_low, color_mi
colnames(m) <- c(xlab,ylab,"facet",var_label)
ggplot2::ggplot(m, ggplot2::aes_string(x=xlab, y=ylab)) + ggplot2::geom_point(stat = "identity", size = size, ggplot2::aes_string(color = var_label)) +
ggplot2::facet_wrap(~facet) + ggplot2::theme_bw() + ggplot2::scale_colour_gradient2(low = color_low, high = color_high, mid = color_mid, midpoint = (max(m[,4])-min(m[,4]))/2 ,name = gsub("_"," ",var_label)) +
ggplot2::theme(strip.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.spacing = unit(0,"lines"),
panel.background = element_blank(), axis.line = ggplot2::element_line(colour = "black"))
ggplot2::theme(strip.background = ggplot2::element_blank(), panel.grid.major = ggplot2::element_blank(), panel.grid.minor = ggplot2::element_blank(), panel.spacing = unit(0,"lines"),
panel.background = ggplot2::element_blank(), axis.line = ggplot2::element_line(colour = "black"))
}

#' Create a scatterplot for each row of a normalized gene expression matrix where x and y axis are from a data dimensionality reduction tool.
Expand Down
Empty file modified R/s3_generics.R
100644 → 100755
Empty file.
Empty file modified R/semi_pheatmap.R
100644 → 100755
Empty file.
Empty file modified R/split_clusters.R
100644 → 100755
Empty file.
2 changes: 1 addition & 1 deletion README.md
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ An analysis example using celda with RNASeq via vignette('celda-analysis')


## New Features and announcements
The v0.2 release of celda represents a useable implementation of the various celda clustering models.
The v0.3 release of celda represents a useable implementation of the various celda clustering models.
Please submit any usability issues or bugs to the issue tracker at https://github.com/compbiomed/celda

You can discuss celda, or ask the developers usage questions, in our [Google Group.](https://groups.google.com/forum/#!forum/celda-list)
1 change: 0 additions & 1 deletion data/pbmc_data.rda.REMOVED.git-id

This file was deleted.

Empty file modified data/sample.cells.rda
100644 → 100755
Empty file.
Empty file modified extdata/test.png
100644 → 100755
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 7452d26

Please sign in to comment.