Skip to content

Commit d8f88f8

Browse files
authored
Add files via upload
1 parent 1b00f56 commit d8f88f8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+14427
-1579
lines changed

R/RcppExports.R

Lines changed: 79 additions & 160 deletions
Original file line numberDiff line numberDiff line change
@@ -1,160 +1,79 @@
1-
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
2-
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3-
4-
Rcpp_predict_sparse <- function(A, mask, w, L1, L2, threads, mask_zeros) {
5-
.Call(`_RcppML_Rcpp_predict_sparse`, A, mask, w, L1, L2, threads, mask_zeros)
6-
}
7-
8-
Rcpp_predict_dense <- function(A_, mask, w, L1, L2, threads, mask_zeros) {
9-
.Call(`_RcppML_Rcpp_predict_dense`, A_, mask, w, L1, L2, threads, mask_zeros)
10-
}
11-
12-
Rcpp_mse_sparse <- function(A, mask, w, d, h, threads, mask_zeros) {
13-
.Call(`_RcppML_Rcpp_mse_sparse`, A, mask, w, d, h, threads, mask_zeros)
14-
}
15-
16-
Rcpp_mse_dense <- function(A_, mask, w, d, h, threads, mask_zeros) {
17-
.Call(`_RcppML_Rcpp_mse_dense`, A_, mask, w, d, h, threads, mask_zeros)
18-
}
19-
20-
Rcpp_mse_missing_sparse <- function(A, mask, w, d, h, threads) {
21-
.Call(`_RcppML_Rcpp_mse_missing_sparse`, A, mask, w, d, h, threads)
22-
}
23-
24-
Rcpp_mse_missing_dense <- function(A_, mask, w, d, h, threads) {
25-
.Call(`_RcppML_Rcpp_mse_missing_dense`, A_, mask, w, d, h, threads)
26-
}
27-
28-
Rcpp_nmf_sparse <- function(A, mask, tol, maxit, verbose, L1, L2, threads, w_init, link_matrix_h, mask_zeros, link_h, sort_model) {
29-
.Call(`_RcppML_Rcpp_nmf_sparse`, A, mask, tol, maxit, verbose, L1, L2, threads, w_init, link_matrix_h, mask_zeros, link_h, sort_model)
30-
}
31-
32-
Rcpp_nmf_dense <- function(A_, mask, tol, maxit, verbose, L1, L2, threads, w_init, link_matrix_h, mask_zeros, link_h, sort_model) {
33-
.Call(`_RcppML_Rcpp_nmf_dense`, A_, mask, tol, maxit, verbose, L1, L2, threads, w_init, link_matrix_h, mask_zeros, link_h, sort_model)
34-
}
35-
36-
Rcpp_bipartition_sparse <- function(A, tol, maxit, nonneg, samples, seed, verbose = FALSE, calc_dist = FALSE, diag = TRUE) {
37-
.Call(`_RcppML_Rcpp_bipartition_sparse`, A, tol, maxit, nonneg, samples, seed, verbose, calc_dist, diag)
38-
}
39-
40-
Rcpp_bipartition_dense <- function(A, tol, maxit, nonneg, samples, seed, verbose = FALSE, calc_dist = FALSE, diag = TRUE) {
41-
.Call(`_RcppML_Rcpp_bipartition_dense`, A, tol, maxit, nonneg, samples, seed, verbose, calc_dist, diag)
42-
}
43-
44-
Rcpp_dclust_sparse <- function(A, min_samples, min_dist, verbose, tol, maxit, nonneg, seed, threads) {
45-
.Call(`_RcppML_Rcpp_dclust_sparse`, A, min_samples, min_dist, verbose, tol, maxit, nonneg, seed, threads)
46-
}
47-
48-
#' @title Non-negative least squares
49-
#'
50-
#' @description Solves the equation \code{a %*% x = b} for \code{x} subject to \eqn{x > 0}.
51-
#'
52-
#' @details
53-
#' This is a very fast implementation of non-negative least squares (NNLS), suitable for very small or very large systems.
54-
#'
55-
#' **Algorithm**. Sequential coordinate descent (CD) is at the core of this implementation, and requires an initialization of \eqn{x}. There are two supported methods for initialization of \eqn{x}:
56-
#' 1. **Zero-filled initialization** when \code{fast_nnls = FALSE} and \code{cd_maxit > 0}. This is generally very efficient for well-conditioned and small systems.
57-
#' 2. **Approximation with FAST** when \code{fast_nnls = TRUE}. Forward active set tuning (FAST), described below, finds an approximate active set using unconstrained least squares solutions found by Cholesky decomposition and substitution. To use only FAST approximation, set \code{cd_maxit = 0}.
58-
#'
59-
#' \code{a} must be symmetric positive definite if FAST NNLS is used, but this is not checked.
60-
#'
61-
#' See our BioRXiv manuscript (references) for benchmarking against Lawson-Hanson NNLS and for a more technical introduction to these methods.
62-
#'
63-
#' **Coordinate Descent NNLS**. Least squares by **sequential coordinate descent** is used to ensure the solution returned is exact. This algorithm was
64-
#' introduced by Franc et al. (2005), and our implementation is a vectorized and optimized rendition of that found in the NNLM R package by Xihui Lin (2020).
65-
#'
66-
#' **FAST NNLS.** Forward active set tuning (FAST) is an exact or near-exact NNLS approximation initialized by an unconstrained
67-
#' least squares solution. Negative values in this unconstrained solution are set to zero (the "active set"), and all
68-
#' other values are added to a "feasible set". An unconstrained least squares solution is then solved for the
69-
#' "feasible set", any negative values in the resulting solution are set to zero, and the process is repeated until
70-
#' the feasible set solution is strictly positive.
71-
#'
72-
#' The FAST algorithm has a definite convergence guarantee because the
73-
#' feasible set will either converge or become smaller with each iteration. The result is generally exact or nearly
74-
#' exact for small well-conditioned systems (< 50 variables) within 2 iterations and thus sets up coordinate
75-
#' descent for very rapid convergence. The FAST method is similar to the first phase of the so-called "TNT-NN" algorithm (Myre et al., 2017),
76-
#' but the latter half of that method relies heavily on heuristics to refine the approximate active set, which we avoid by using
77-
#' coordinate descent instead.
78-
#'
79-
#' @param a symmetric positive definite matrix giving coefficients of the linear system
80-
#' @param b matrix giving the right-hand side(s) of the linear system
81-
#' @param L1 L1/LASSO penalty to be subtracted from \code{b}
82-
#' @param L2 Ridge penalty to be added to diagonal of \code{a}
83-
#' @param PE Pattern Extraction (angular) penalty to be added to off-diagonal values of \code{a}
84-
#' @param fast_nnls initialize coordinate descent with a FAST NNLS approximation
85-
#' @param cd_maxit maximum number of coordinate descent iterations
86-
#' @param cd_tol stopping criteria, difference in \eqn{x} across consecutive solutions over the sum of \eqn{x}
87-
#' @return vector or matrix giving solution for \code{x}
88-
#' @export
89-
#' @author Zach DeBruine
90-
#' @seealso \code{\link{nmf}}, \code{\link{project}}
91-
#' @md
92-
#'
93-
#' @references
94-
#'
95-
#' DeBruine, ZJ, Melcher, K, and Triche, TJ. (2021). "High-performance non-negative matrix factorization for large single-cell data." BioRXiv.
96-
#'
97-
#' Franc, VC, Hlavac, VC, and Navara, M. (2005). "Sequential Coordinate-Wise Algorithm for the Non-negative Least Squares Problem. Proc. Int'l Conf. Computer Analysis of Images and Patterns."
98-
#'
99-
#' Lin, X, and Boutros, PC (2020). "Optimization and expansion of non-negative matrix factorization." BMC Bioinformatics.
100-
#'
101-
#' Myre, JM, Frahm, E, Lilja DJ, and Saar, MO. (2017) "TNT-NN: A Fast Active Set Method for Solving Large Non-Negative Least Squares Problems". Proc. Computer Science.
102-
#'
103-
#' @examples
104-
#' \dontrun{
105-
#' # compare solution to base::solve for a random system
106-
#' X <- matrix(runif(100), 10, 10)
107-
#' a <- crossprod(X)
108-
#' b <- crossprod(X, runif(10))
109-
#' unconstrained_soln <- solve(a, b)
110-
#' nonneg_soln <- nnls(a, b)
111-
#' unconstrained_err <- mean((a %*% unconstrained_soln - b)^2)
112-
#' nonnegative_err <- mean((a %*% nonneg_soln - b)^2)
113-
#' unconstrained_err
114-
#' nonnegative_err
115-
#' all.equal(solve(a, b), nnls(a, b))
116-
#'
117-
#' # example adapted from multiway::fnnls example 1
118-
#' X <- matrix(1:100,50,2)
119-
#' y <- matrix(101:150,50,1)
120-
#' beta <- solve(crossprod(X)) %*% crossprod(X, y)
121-
#' beta
122-
#' beta <- nnls(crossprod(X), crossprod(X, y))
123-
#' beta
124-
#' }
125-
nnls <- function(a, b, cd_maxit = 100L, cd_tol = 1e-8, fast_nnls = FALSE, L1 = 0, L2 = 0, PE = 0) {
126-
.Call(`_RcppML_nnls`, a, b, cd_maxit, cd_tol, fast_nnls, L1, L2, PE)
127-
}
128-
129-
c_rmatrix <- function(nrow, ncol, rng) {
130-
.Call(`_RcppML_c_rmatrix`, nrow, ncol, rng)
131-
}
132-
133-
c_rtimatrix <- function(nrow, ncol, rng) {
134-
.Call(`_RcppML_c_rtimatrix`, nrow, ncol, rng)
135-
}
136-
137-
c_runif <- function(n, min, max, rng, rng2) {
138-
.Call(`_RcppML_c_runif`, n, min, max, rng, rng2)
139-
}
140-
141-
c_rbinom <- function(n, size, inv_probability, rng, rng2) {
142-
.Call(`_RcppML_c_rbinom`, n, size, inv_probability, rng, rng2)
143-
}
144-
145-
c_sample <- function(n, size, replace, rng, rng2) {
146-
.Call(`_RcppML_c_sample`, n, size, replace, rng, rng2)
147-
}
148-
149-
c_rtisparsematrix <- function(nrow, ncol, inv_probability, pattern_only, rng) {
150-
.Call(`_RcppML_c_rtisparsematrix`, nrow, ncol, inv_probability, pattern_only, rng)
151-
}
152-
153-
c_rsparsematrix <- function(nrow, ncol, inv_probability, pattern_only, rng) {
154-
.Call(`_RcppML_c_rsparsematrix`, nrow, ncol, inv_probability, pattern_only, rng)
155-
}
156-
157-
Rcpp_bipartite_match <- function(x) {
158-
.Call(`_RcppML_Rcpp_bipartite_match`, x)
159-
}
160-
1+
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
2+
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
3+
4+
#' Benchmark structure performance
5+
#'
6+
#' Measure runtime for computing column sums and sparse-dense matrix multiplication
7+
#'
8+
#' @param x sparse matrix object of class \code{dgCMatrix} of dimensions \code{m x n} with integral non-zero values
9+
#' @param y dense matrix object of class \code{matrix} of dimensions \code{n x k}
10+
#' @param n_reps number of reps
11+
#' @return global variables \code{bench_colsums} and \code{bench_tcrossprod} in the global environment. Use \code{RcppClock::plot} method to visualize results, or coerce to a data.frame.
12+
#' @export
13+
#' @seealso \code{\link{tcrossprod}}
14+
#' @examples
15+
#' library(Matrix)
16+
#' x <- rsparsematrix(50, 100, density = 0.5,
17+
#' rand.x = function(n) { sample(1:10, n, replace = TRUE)})
18+
#' y <- matrix(runif(10 * 100), 10, 100)
19+
#' VSE::benchmark(x, y)
20+
#' plot(bench_tcrossprod)
21+
#' plot(bench_colsums)
22+
benchmark <- function(x, y, n_reps = 100L) {
23+
invisible(.Call(`_VSE_benchmark`, x, y, n_reps))
24+
}
25+
26+
#' Compute column sums of sparse-encoded data
27+
#'
28+
#' @param A object of class \code{dgCMatrix}
29+
#' @param encoding sparse encoding to be used
30+
#' @export
31+
#' @return vector of column sums
32+
#' @examples
33+
#' library(Matrix)
34+
#' A <- rsparsematrix(100, 1000, density = 0.5,
35+
#' rand.x = function(n) { sample(1:10, n, replace = TRUE)})
36+
#' v <- colsums(A, encoding = "TRCSC_NP")
37+
#' plot(v, Matrix::colSums(A))
38+
colsums <- function(A, encoding = "CSC") {
39+
.Call(`_VSE_colsums`, A, encoding)
40+
}
41+
42+
c_inspect <- function(A, encoding = "CSC") {
43+
.Call(`_VSE_c_inspect`, A, encoding)
44+
}
45+
46+
#' Get memory used by C++ object
47+
#'
48+
#' Calculates size of vector container, all sub-containers, and all index and value types within the container. This function stores all values and indices as \code{short int}.
49+
#'
50+
#' @param A object of class \code{dgCMatrix}
51+
#' @param encoding sparse encoding to be used
52+
#' @export
53+
#' @return size of object in bytes
54+
#' @examples
55+
#' library(Matrix)
56+
#' A <- rsparsematrix(100, 1000, density = 0.5,
57+
#' rand.x = function(n) { sample(1:10, n, replace = TRUE)})
58+
#' memuse(A, "TRCSC_NP")
59+
memuse <- function(A, encoding = "CSC") {
60+
.Call(`_VSE_memuse`, A, encoding)
61+
}
62+
63+
#' Cross-product of transpose
64+
#'
65+
#' @param x sparse matrix object of class \code{dgCMatrix} of dimensions \code{m x n} with integral non-zero values
66+
#' @param y dense matrix object of class \code{matrix} of dimensions \code{n x k}
67+
#' @param encoding sparse encoding to be used
68+
#' @export
69+
#' @return dense matrix giving the transpose cross-product
70+
#' @examples
71+
#' library(Matrix)
72+
#' x <- rsparsematrix(100, 1000, density = 0.5,
73+
#' rand.x = function(n){sample(1 : 10, n, replace = TRUE)})
74+
#' y <- matrix(runif(10 * 1000), 10, 1000)
75+
#' plot(VSE::tcrossprod(x, y), t(Matrix::tcrossprod(x, y)))
76+
tcrossprod <- function(x, y, encoding = "CSC") {
77+
.Call(`_VSE_tcrossprod`, x, y, encoding)
78+
}
79+

R/VSE.R

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#' VSE: Vectorized Sparse Encoding
2+
#'
3+
#' @description
4+
#' Proof-of-concept implementation of vectorized sparse
5+
#' encodings for scalable and compressed structures for sparse data
6+
#'
7+
#' @import knitr Matrix RcppEigen RcppClock
8+
#' @importFrom Rcpp evalCpp
9+
#' @useDynLib VSE, .registration = TRUE
10+
#' @docType package
11+
#' @name VSE
12+
#' @author Zach DeBruine
13+
#' @aliases VSE-package
14+
#' @md
15+
#'
16+
NULL

R/inspect.R

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#' View internal structure of sparse encodings
2+
#'
3+
#' @param A object of class \code{dgCMatrix}
4+
#' @param encoding sparse encoding to be used
5+
#' @importFrom utils str
6+
#' @export
7+
#' @return debug level output to console
8+
#' @examples
9+
#' library(Matrix)
10+
#' A <- rsparsematrix(20, 5, density = 0.2,
11+
#' rand.x = function(n) { sample(1:5, n, replace = TRUE)})
12+
#' inspect(A, "TRCSC_NP")
13+
inspect <- function(A, encoding = "TRCSC_NP"){
14+
str(c_inspect(A, encoding)[-1])
15+
}

R/redundancy.R

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#' Calculate redundancy of data per column
2+
#'
3+
#' @param A sparse matrix of class \code{dgCMatrix}
4+
#' @export
5+
#' @return fractional redundancy by column
6+
#'
7+
calc_redundancy <- function(A){
8+
redundancy <- c()
9+
for(i in 1:ncol(A)){
10+
redundancy <- c(redundancy, length(unique(A[,1])) / (A@p[[i + 1]] - A@p[[i]]))
11+
}
12+
1 - redundancy
13+
}

0 commit comments

Comments
 (0)