itsrainingdata
diff --git a/‎DESCRIPTION
Lines changed: 3 additions & 3 deletions b/‎DESCRIPTION
Lines changed: 3 additions & 3 deletions
diff --git a/‎NEWS.md
Lines changed: 11 additions & 0 deletions b/‎NEWS.md
Lines changed: 11 additions & 0 deletions
diff --git a/‎R/RcppExports.R
Lines changed: 4 additions & 4 deletions b/‎R/RcppExports.R
Lines changed: 4 additions & 4 deletions
diff --git a/‎R/ccdrAlgorithm-bwlist.R
Lines changed: 71 additions & 0 deletions b/‎R/ccdrAlgorithm-bwlist.R
Lines changed: 71 additions & 0 deletions
diff --git a/‎R/ccdrAlgorithm-main.R
Lines changed: 100 additions & 16 deletions b/‎R/ccdrAlgorithm-main.R
Lines changed: 100 additions & 16 deletions
@@ -1,7 +1,7 @@
 Package: ccdrAlgorithm
 Title: CCDr Algorithm for Learning Sparse Gaussian Bayesian Networks
-Version: 0.0.3
-Date: 2017-03-09
+Version: 0.0.4
+Date: 2017-09-09
 Authors@R: c(
     person("Bryon", "Aragam", email = "sparsebn@gmail.com", role = c("aut", "cre")),
     person("Dacheng", "Zhang", role = c("aut"))
@@ -11,7 +11,7 @@ Description: Implementation of the CCDr (Concave penalized Coordinate Descent wi
 Depends:
     R (>= 3.2.3)
 Imports:
-    sparsebnUtils (>= 0.0.4),
+    sparsebnUtils (>= 0.0.5),
     Rcpp (>= 0.11.0),
     stats,
     utils
 
@@ -1,3 +1,14 @@
+# ccdrAlgorithm 0.0.4
+
+## Features
+
+* `ccdr.run` now supports black lists and white lists (sparsebn #6)
+* Users can now use prior knowledge of variances via the new `sigmas` argument to `ccdr.run` (sparsebn #12)
+
+## Bug fixes
+
+* `ivn.rand = FALSE` not handled correctly (#4)
+
 # ccdrAlgorithm 0.0.3
 
 ## Features
 
@@ -1,11 +1,11 @@
 # Generated by using Rcpp::compileAttributes() -> do not edit by hand
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
-gridCCDr <- function(cors, init_betas, nj, indexj, aj, lambdas, params, verbose) {
-    .Call('ccdrAlgorithm_gridCCDr', PACKAGE = 'ccdrAlgorithm', cors, init_betas, nj, indexj, aj, lambdas, params, verbose)
+gridCCDr <- function(cors, init_betas, init_sigmas, nj, indexj, aj, lambdas, weights, params, verbose) {
+    .Call(`_ccdrAlgorithm_gridCCDr`, cors, init_betas, init_sigmas, nj, indexj, aj, lambdas, weights, params, verbose)
 }
 
-singleCCDr <- function(cors, init_betas, nj, indexj, aj, lambda, params, verbose) {
-    .Call('ccdrAlgorithm_singleCCDr', PACKAGE = 'ccdrAlgorithm', cors, init_betas, nj, indexj, aj, lambda, params, verbose)
+singleCCDr <- function(cors, init_betas, init_sigmas, nj, indexj, aj, lambda, weights, params, verbose) {
+    .Call(`_ccdrAlgorithm_singleCCDr`, cors, init_betas, init_sigmas, nj, indexj, aj, lambda, weights, params, verbose)
 }
 
@@ -0,0 +1,71 @@
+#
+#  ccdrAlgorithm-bwlist.R
+#  ccdrAlgorithm
+#
+#  Created by Bryon Aragam (local) on 8/11/17.
+#  Copyright (c) 2014-2017 Bryon Aragam. All rights reserved.
+#
+
+#
+# PACKAGE CCDRALGORITHM: Helper methods for black/white lists
+#
+#   CONTENTS:
+#     names_to_indices
+#     rows_to_list
+#     bwlist_check
+#     bwlist_to_weights
+#
+
+### Just a wrapper for match with a better name
+names_to_indices <- function(v, names){
+    match(v, names)
+} # END NAMES_TO_INDICES
+
+### Returns a list whose components are the rows of a matrix
+rows_to_list <- function(m){
+    lapply(1:nrow(m), function(j) m[j,])
+} # END ROWS_TO_LIST
+
+### Check correctness of input and transform from matrix to list of indices
+bwlist_check <- function(bwlist, names){
+    ## Consistency checks
+    if(!is.matrix(bwlist) || ncol(bwlist) != 2){
+        stop("Input must be a matrix with exactly 2 columns!")
+    }
+
+    if(any(is.na(bwlist))){
+        stop("Input cannot have missing values!")
+    }
+
+    ### Convert characters names to indices
+    if(is.character(bwlist)){
+        bwlist <- as.vector(bwlist)
+        bwlist <- names_to_indices(bwlist, names)
+        bwlist <- matrix(bwlist, ncol = 2)
+    }
+
+    storage.mode(bwlist) <- "integer" # This is important in ccdr_call to check overlap between blacklist and whitelist, fails if numerics are mixed with ints
+    rows_to_list(bwlist)
+} # END BWLIST_CHECK
+
+### Convert b/w lists to weight matrix of {-1,0,1}
+#     -1 = black listed (guaranteed to be absent / zero)
+#      0 = white listed (guaranteed to be present / nonzero)
+#      1 = gray listed (may or may not be final model)
+bwlist_to_weights <- function(black, white, nnode){
+    weights <- matrix(1L, ncol = nnode, nrow = nnode)
+
+    if(!is.null(white)){
+        for(k in 1:length(white)){
+            weights[white[[k]][1], white[[k]][2]] <- 0L
+        }
+    }
+
+    if(!is.null(black)){
+        for(k in 1:length(black)){
+            weights[black[[k]][1], black[[k]][2]] <- -1L
+        }
+    }
+
+    weights
+} # END BWLIST_TO_WEIGHTS
@@ -35,23 +35,36 @@ NULL
 #' This implementation includes two options for the penalty: (1) MCP, and (2) L1 (or Lasso). This option
 #' is controlled by the \code{gamma} argument.
 #'
-#' @param data Data as \code{\link[sparsebnUtils]{sparsebnData}}. Must be numeric and contain no missing values.
-#' @param betas Initial guess for the algorithm. Represents the weighted adjacency matrix
-#'              of a DAG where the algorithm will begin searching for an optimal structure.
-#' @param lambdas (optional) Numeric vector containing a grid of lambda values (i.e. regularization
+#' @param data Data as \code{\link[sparsebnUtils]{sparsebnData}} object. Must be numeric and contain no missing values.
+#' @param lambdas Numeric vector containing a grid of lambda values (i.e. regularization
 #'                parameters) to use in the solution path. If missing, a default grid of values will be
 #'                used based on a decreasing log-scale  (see also \link{generate.lambdas}).
 #' @param lambdas.length Integer number of values to include in the solution path. If \code{lambdas}
 #'                       has also been specified, this value will be ignored. Note also that the final
 #'                       solution path may contain fewer estimates (see
 #'                       \code{alpha}).
+#' @param whitelist A two-column matrix of edges that are guaranteed to be in each
+#'                  estimate (a "white list"). Each row in this matrix corresponds
+#'                  to an edge that is to be whitelisted. These edges can be
+#'                  specified by node name (as a \code{character} matrix), or by
+#'                  index (as a \code{numeric} matrix).
+#' @param blacklist A two-column matrix of edges that are guaranteed to be absent
+#'                  from each estimate (a "black list"). See argument
+#'                  "\code{whitelist}" above for more details.
 #' @param gamma Value of concavity parameter. If \code{gamma > 0}, then the MCP will be used
 #'              with \code{gamma} as the concavity parameter. If \code{gamma < 0}, then the L1 penalty
 #'              will be used and this value is otherwise ignored.
 #' @param error.tol Error tolerance for the algorithm, used to test for convergence.
 #' @param max.iters Maximum number of iterations for each internal sweep.
 #' @param alpha Threshold parameter used to terminate the algorithm whenever the number of edges in the
 #'              current DAG estimate is \code{> alpha * ncol(data)}.
+#' @param betas Initial guess for the algorithm. Represents the weighted adjacency matrix
+#'              of a DAG where the algorithm will begin searching for an optimal structure.
+#' @param sigmas Numeric vector of known values of conditional variances for each node in the network. If this is
+#'               set by the user, these parameters will not be computed and the input will
+#'               be used as the "true" values of the variances in the algorithm. Note that setting
+#'               this to be all ones (i.e. \code{sigmas[j] = 1} for all \code{j}) is
+#'               equivalent to using the least-squares loss.
 #' @param verbose \code{TRUE / FALSE} whether or not to print out progress and summary reports.
 #'
 #' @return A \code{\link[sparsebnUtils]{sparsebnPath}} object.
@@ -77,13 +90,16 @@ NULL
 #'
 #' @export
 ccdr.run <- function(data,
-                     betas,
                      lambdas = NULL,
                      lambdas.length = NULL,
+                     whitelist = NULL,
+                     blacklist = NULL,
                      gamma = 2.0,
                      error.tol = 1e-4,
                      max.iters = NULL,
                      alpha = 10,
+                     betas,
+                     sigmas = NULL,
                      verbose = FALSE
 ){
     ### Check data format
@@ -94,12 +110,24 @@ ccdr.run <- function(data,
     data_matrix <- data$data
     ivn_list <- data$ivn
 
+    ### If ivn_list contains character names, convert to indices
+    if("character" %in% sparsebnUtils::list_classes(ivn_list)){
+        ivn_list <- lapply(ivn_list, function(x){
+            idx <- match(x, names(data_matrix))
+            if(length(idx) == 0) NULL # return NULL if no match (=> observational)
+            else idx
+        })
+    }
+
     ### Call the CCDr algorithm
     ccdr_call(data = data_matrix,
               ivn = ivn_list,
               betas = betas,
+              sigmas = sigmas,
               lambdas = lambdas,
               lambdas.length = lambdas.length,
+              whitelist = whitelist,
+              blacklist = blacklist,
               gamma = gamma,
               error.tol = error.tol,
               rlam = NULL,
@@ -120,15 +148,19 @@ MAX_CCS_ARRAY_SIZE <- function() 10000
 ccdr_call <- function(data,
                       ivn = NULL,
                       betas,
+                      sigmas,
                       lambdas,
                       lambdas.length,
+                      whitelist = NULL,
+                      blacklist = NULL,
                       gamma,
                       error.tol,
                       rlam,
                       max.iters,
                       alpha,
                       verbose = FALSE
 ){
+    node_names <- names(data)
 #     ### Allow users to input a data.frame, but kindly warn them about doing this
 #     if(is.data.frame(data)){
 #         warning(sparsebnUtils::alg_input_data_frame())
@@ -171,6 +203,11 @@ ccdr_call <- function(data,
         nj[j] <- sum(!sapply(lapply(ivn, is.element, j), any)) ## optimize for sorted column?
     }
 
+    ### Set default for sigmas (negative values => ignore initial value and update as usual)
+    if(is.null(sigmas)){
+        sigmas <- rep(-1., pp)
+    }
+
     ### Use default values for lambda if not specified
     if(is.null(lambdas)){
         if(is.null(lambdas.length)){
@@ -227,6 +264,23 @@ ccdr_call <- function(data,
         max.iters <- sparsebnUtils::default_max_iters(pp)
     }
 
+    ### White/black lists
+    # Be careful about handling various NULL cases
+    if(!is.null(whitelist)) whitelist <- bwlist_check(whitelist, node_names)
+    if(!is.null(blacklist)) blacklist <- bwlist_check(blacklist, node_names)
+
+    if(!is.null(whitelist) && !is.null(blacklist)){
+        if(length(intersect(whitelist, blacklist)) > 0){
+            badinput <- vapply(intersect(whitelist, blacklist), function(x) sprintf("\t[%s]\n", paste(x, collapse = ",")), FUN.VALUE = "vector")
+            badinput <- paste(badinput, collapse = "")
+            msg <- sprintf("Duplicate entries found in blacklist and whitelist: \n%s", badinput)
+            stop(msg)
+        }
+    }
+
+    weights <- bwlist_to_weights(blacklist, whitelist, nnode = pp)
+
+    ### Pre-process correlation data
     t1.cor <- proc.time()[3]
     #     cors <- cor(data)
     #     cors <- cors[upper.tri(cors, diag = TRUE)]
@@ -241,7 +295,9 @@ ccdr_call <- function(data,
                       as.integer(nj),
                       as.integer(indexj),
                       betas,
+                      as.numeric(sigmas),
                       as.numeric(lambdas),
+                      as.integer(weights),
                       as.numeric(gamma),
                       as.numeric(error.tol),
                       as.integer(max.iters),
@@ -260,7 +316,7 @@ ccdr_call <- function(data,
         names(fit[[k]]$edges) <- names(data)
 
         ### Add node names to output
-        fit[[k]] <- append(fit[[k]], list(names(data)), after = 1) # insert node names into second slot
+        fit[[k]] <- append(fit[[k]], list(node_names), after = 1) # insert node names into second slot
         names(fit[[k]])[2] <- "nodes"
     }
 
@@ -276,7 +332,9 @@ ccdr_gridR <- function(cors,
                        nj = NULL,
                        indexj = NULL,
                        betas,
+                       sigmas,
                        lambdas,
+                       weights,
                        gamma,
                        eps,
                        maxIters,
@@ -307,7 +365,9 @@ ccdr_gridR <- function(cors,
                                       nj,
                                       indexj,
                                       betas,
+                                      sigmas,
                                       lambdas[i],
+                                      weights,
                                       gamma = gamma,
                                       eps = eps,
                                       maxIters = maxIters,
@@ -345,39 +405,46 @@ ccdr_singleR <- function(cors,
                          nj = NULL,
                          indexj = NULL,
                          betas,
+                         sigmas,
                          lambda,
+                         weights,
                          gamma,
                          eps,
                          maxIters,
                          alpha,     # 2-9-15: No longer necessary in ccdr_singleR, but needed since the C++ call asks for it
                          verbose = FALSE
 ){
 
-    if(is.null(indexj)) indexj <- rep(0L, pp + 1)
+    ### Check dimension parameters
+    if(!is.integer(pp) || !is.integer(nn)) stop("Both pp and nn must be integers!")
+    if(pp <= 0 || nn <= 0) stop("Both pp and nn must be positive!")
+
+    ### These variables, if NULL, need to be initialized before checking anything
+    if(is.null(indexj)) indexj <- rep(0L, pp + 1) # initialize indexj
+    if(is.null(nj)) nj <- as.integer(rep(nn, pp)) # initialize nj
+
     ### Check indexj
     if(!is.vector(indexj)) stop("Index vector for cors is not a vector.")
     if(length(indexj) > pp + 1) stop(sprintf("Index vector for cors is too long, expected to be no greater than %d, the number of columns of data.", pp))
     if(!is.integer(indexj)) stop("Index vector for cors has non-integer component(s).")
+    if(any(is.na(indexj) | is.null(indexj))) stop("Index vector cannot have missing or NULL values.")
     if(any(indexj < 0 | indexj > pp + 1)) stop(sprintf("Index vector for cors has out-of-range component(s), expected to be between 0 and %d.", pp))
 
-    if(is.null(nj)) nj <- as.integer(rep(nn, pp))
     ### Check nj
     if(!is.vector(nj)) stop("Intervention times vector is not a vector.")
-    if(length(nj) != pp) stop(sprintf("Length of intervention times vector is %d, expected %d% to match the number of columns of data", length(nj), pp))
+    if(length(nj) != pp) stop(sprintf("Length of intervention times vector is %d, expected to match the number of columns of data = %d", length(nj), pp))
     if(!is.integer(nj)) stop("Intervention times vector has non-integer component(s).")
+    if(any(is.na(nj) | is.null(nj))) stop("Intervention times vector cannot have missing or NULL values.")
     if(any(nj < 0 | nj > nn)) stop(sprintf("Intervention times vector has out-of-range component(s), expected to be between 0 and %d.", nn))
 
-    ### add a weight a_j to penalty on beta_{ij}
-    ### since now with intervention data, beta_{ij} only appears n_j times out of total nn samples
-    aj <- nj / nn
-
     ### Check cors
+    ### This check must come after the checks for indexj, nj since these values are used to check cors
     if(!is.numeric(cors)) stop("cors must be a numeric vector!")
     if(length(cors) != length(unique(indexj))*pp*(pp+1)/2) stop(paste0("cors has incorrect length: Expected length = ", length(unique(indexj))*pp*(pp+1)/2, " input length = ", length(cors)))
 
-    ### Check dimension parameters
-    if(!is.integer(pp) || !is.integer(nn)) stop("Both pp and nn must be integers!")
-    if(pp <= 0 || nn <= 0) stop("Both pp and nn must be positive!")
+    ### add a weight a_j to penalty on beta_{ij}
+    ### since now with intervention data, beta_{ij} only appears n_j times out of total nn samples
+    aj <- nj / nn
 
     ### Check betas
     if(sparsebnUtils::check_if_matrix(betas)){ # if the input is a matrix, convert to SBM object
@@ -387,10 +454,25 @@ ccdr_singleR <- function(cors,
         stop("Incompatible data passed for betas parameter: Should be either matrix or list in SparseBlockMatrixR format.")
     }
 
+    ### Check sigmas
+    if(!is.numeric(sigmas)) stop("sigmas must be numeric!")
+    if(length(sigmas) != pp) stop(sprintf("sigmas must have length = %d!", pp))
+    if(any(sigmas < 0)){
+        # -1 is a sentinel value for updating sigmas via the CD updates
+        if(any(sigmas != -1.)){
+            stop("sigmas must be > 0!")
+        }
+    }
+
     ### Check lambda
     if(!is.numeric(lambda)) stop("lambda must be numeric!")
     if(lambda < 0) stop("lambda must be >= 0!")
 
+    ### Check weights
+    if(length(weights) != pp*pp) stop(sprintf("weights must have length p^2 = %d!", pp*pp))
+    if(!is.numeric(weights)) stop("weights must be numeric!")
+    if(weights < -1 || weights > 1) stop("weights out of bounds!")
+
     ### Check gamma
     if(!is.numeric(gamma)) stop("gamma must be numeric!")
     if(gamma < 0 && gamma != -1) stop("gamma must be >= 0 (MCP) or = -1 (Lasso)!")
@@ -412,10 +494,12 @@ ccdr_singleR <- function(cors,
     t1.ccdr <- proc.time()[3]
     ccdr.out <- singleCCDr(cors,
                            betas,
+                           sigmas,
                            nj,
                            indexj,
                            aj,
                            lambda,
+                           weights,
                            c(gamma, eps, maxIters, alpha),
                            verbose = verbose)
     t2.ccdr <- proc.time()[3]
Original file line number	Diff line number	Diff line change
`@@ -1,11 +1,11 @@`
`1`	`1`	`# Generated by using Rcpp::compileAttributes() -> do not edit by hand`
`2`	`2`	`# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393`
`3`	`3`
`4`		`-gridCCDr <- function(cors, init_betas, nj, indexj, aj, lambdas, params, verbose) {`
`5`		`- .Call('ccdrAlgorithm_gridCCDr', PACKAGE = 'ccdrAlgorithm', cors, init_betas, nj, indexj, aj, lambdas, params, verbose)`
	`4`	`+gridCCDr <- function(cors, init_betas, init_sigmas, nj, indexj, aj, lambdas, weights, params, verbose) {`
	`5`	+ .Call(`_ccdrAlgorithm_gridCCDr`, cors, init_betas, init_sigmas, nj, indexj, aj, lambdas, weights, params, verbose)
`6`	`6`	`}`
`7`	`7`
`8`		`-singleCCDr <- function(cors, init_betas, nj, indexj, aj, lambda, params, verbose) {`
`9`		`- .Call('ccdrAlgorithm_singleCCDr', PACKAGE = 'ccdrAlgorithm', cors, init_betas, nj, indexj, aj, lambda, params, verbose)`
	`8`	`+singleCCDr <- function(cors, init_betas, init_sigmas, nj, indexj, aj, lambda, weights, params, verbose) {`
	`9`	+ .Call(`_ccdrAlgorithm_singleCCDr`, cors, init_betas, init_sigmas, nj, indexj, aj, lambda, weights, params, verbose)
`10`	`10`	`}`
`11`	`11`