itsrainingdata
diff --git a/‎.Rbuildignore
Lines changed: 5 additions & 0 deletions b/‎.Rbuildignore
Lines changed: 5 additions & 0 deletions
diff --git a/‎.travis.yml
Lines changed: 0 additions & 4 deletions b/‎.travis.yml
Lines changed: 0 additions & 4 deletions
diff --git a/‎DESCRIPTION
Lines changed: 6 additions & 3 deletions b/‎DESCRIPTION
Lines changed: 6 additions & 3 deletions
diff --git a/‎NAMESPACE
Lines changed: 3 additions & 0 deletions b/‎NAMESPACE
Lines changed: 3 additions & 0 deletions
diff --git a/‎NEWS.md
Lines changed: 10 additions & 0 deletions b/‎NEWS.md
Lines changed: 10 additions & 0 deletions
diff --git a/‎R/RcppExports.R
Lines changed: 5 additions & 5 deletions b/‎R/RcppExports.R
Lines changed: 5 additions & 5 deletions
diff --git a/‎R/ccdrAlgorithm-functions.R
Lines changed: 52 additions & 0 deletions b/‎R/ccdrAlgorithm-functions.R
Lines changed: 52 additions & 0 deletions
diff --git a/‎R/ccdrAlgorithm-main.R
Lines changed: 59 additions & 5 deletions b/‎R/ccdrAlgorithm-main.R
Lines changed: 59 additions & 5 deletions
diff --git a/‎R/s3-SparseBlockMatrixR.R
Lines changed: 21 additions & 39 deletions b/‎R/s3-SparseBlockMatrixR.R
Lines changed: 21 additions & 39 deletions
@@ -4,3 +4,8 @@
 ^ccdrAlgorithm\.Rproj$
 ^\.Rproj\.user$
 ^\.travis\.yml$
+inst/db
+man-roxygen
+^\.httr-oauth$
+^revdep$
+^codecov\.yml$
@@ -12,7 +12,3 @@ r:
 
 # BiocInstaller required for 'graph' package from Bioconductor
 bioc_required: true
-
-# Install up-to-date dependencies from GitHub
-r_github_packages:
-  - itsrainingdata/sparsebnUtils
@@ -1,8 +1,11 @@
 Package: ccdrAlgorithm
 Title: CCDr Algorithm for Learning Sparse Gaussian Bayesian Networks
-Version: 0.0.1
-Date: 2016-08-08
-Authors@R: person("Bryon", "Aragam", email = "sparsebn@gmail.com", role = c("aut", "cre"))
+Version: 0.0.2
+Date: 2016-11-19
+Authors@R: c(
+    person("Bryon", "Aragam", email = "sparsebn@gmail.com", role = c("aut", "cre")),
+    person("Dacheng", "Zhang", role = c("aut"))
+    )
 Maintainer: Bryon Aragam <sparsebn@gmail.com>
 Description: Implementation of the CCDr (Concave penalized Coordinate Descent with reparametrization) structure learning algorithm as described in Aragam and Zhou (2015) <http://www.jmlr.org/papers/v16/aragam15a.html>. This is a fast, score-based method for learning Bayesian networks that uses sparse regularization and block-cyclic coordinate descent.
 Depends:
 
@@ -1,7 +1,10 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(edgeList,SparseBlockMatrixR)
+S3method(sparse,SparseBlockMatrixR)
 export(ccdr.run)
 importFrom(Rcpp,sourceCpp)
+importFrom(sparsebnUtils,edgeList)
 importFrom(sparsebnUtils,get.adjacency.matrix)
 importFrom(sparsebnUtils,is.zero)
 importFrom(sparsebnUtils,num.edges)
 
@@ -0,0 +1,10 @@
+# ccdrAlgorithm 0.0.2
+
+## Features
+
+* `ccdr.run()` is now compatible with interventional data
+
+# ccdrAlgorithm 0.0.1
+
+* Initial stable release
+
@@ -1,11 +1,11 @@
-# This file was generated by Rcpp::compileAttributes
+# Generated by using Rcpp::compileAttributes() -> do not edit by hand
 # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
 
-gridCCDr <- function(cors, init_betas, nn, lambdas, params, verbose) {
-    .Call('ccdrAlgorithm_gridCCDr', PACKAGE = 'ccdrAlgorithm', cors, init_betas, nn, lambdas, params, verbose)
+gridCCDr <- function(cors, init_betas, nj, indexj, aj, lambdas, params, verbose) {
+    .Call('ccdrAlgorithm_gridCCDr', PACKAGE = 'ccdrAlgorithm', cors, init_betas, nj, indexj, aj, lambdas, params, verbose)
 }
 
-singleCCDr <- function(cors, init_betas, nn, lambda, params, verbose) {
-    .Call('ccdrAlgorithm_singleCCDr', PACKAGE = 'ccdrAlgorithm', cors, init_betas, nn, lambda, params, verbose)
+singleCCDr <- function(cors, init_betas, nj, indexj, aj, lambda, params, verbose) {
+    .Call('ccdrAlgorithm_singleCCDr', PACKAGE = 'ccdrAlgorithm', cors, init_betas, nj, indexj, aj, lambda, params, verbose)
 }
 
@@ -0,0 +1,52 @@
+## returns TRUE if ivn_list is a list of vectors or NULL elements,
+check_if_ivn_list <- function(ivn) {
+    ## check if it is a list
+    if(!is.list(ivn)) return(FALSE)
+
+    ## check if every component is a vector of NULL
+    return(all(sapply(ivn, is.vector) | sapply(ivn, is.null)))
+} # END CHECK_IF_IVN_LIST
+
+## returns TRUE if ivn_list has length nn, the number of sample rows
+check_ivn_size <- function(ivn, data) {
+    ## check if length matches with nn
+    return(length(ivn) == nrow(data))
+} # END CHECK_IF_IVN_SIZE
+
+## returns TRUE if a vector component of 'ivn' is NULL,
+## or has all correct labels of nodes under intervention in this sample:
+## 1) integer, 2) between 1 and pp, and 3) no duplicates
+check_vector_label <- function(vec, pp) {
+
+    if(is.null(vec)) return(TRUE)
+
+    ## Note: If a vector has only integers and NAs, is.integer returns all TRUE
+    ## e.g.: c(NA, 1L, NA, 3L, NA, 5L)
+    ## However, c(1L, NA, 3L, 4, NA) returns all FALSE
+    ## check if labels are integers
+    if(any(is.na(vec)) || !is.integer(vec)) {
+        stop("Non-integer label(s) found in one or more components in ivn.")
+        return(FALSE)
+    }
+
+    ## check if labels are in 1..pp
+    if(any(vec < 1) | any(vec > pp)) {
+        stop(sprintf("Labels should all be between 1 and %d to refer to the columns of data.", pp))
+        return(FALSE)
+    }
+
+    ## check if labels are unique
+    if(anyDuplicated(vec)) {
+        stop("Duplicated label(s) found in one component in ivn.")
+        return(FALSE)
+    }
+
+    return(TRUE)
+} # END CHECK_VECTOR_LABEL
+
+## returns TRUE if every vector in 'ivn' is NULL,
+## or has correct labels: integer, between 1 and pp, and no duplicates
+check_ivn_label <- function(ivn, data) {
+    sapply(ivn, check_vector_label, ncol(data))
+} # END CHECK_IVN_LABEL
+
@@ -92,11 +92,14 @@ ccdr.run <- function(data,
     ### Check data format
     if(!sparsebnUtils::is.sparsebnData(data)) stop(sparsebnUtils::input_not_sparsebnData(data))
 
-    ### Extract the data (CCDr only works on observational data, so ignore the intervention part)
+    ### Extract the data and ivn
+    ### CCDr now works on both observational data and interventional data, and a mixture of both
     data_matrix <- data$data
+    ivn_list <- data$ivn
 
     ### Call the CCDr algorithm
     ccdr_call(data = data_matrix,
+              ivn = ivn_list,
               betas = betas,
               lambdas = lambdas,
               lambdas.length = lambdas.length,
@@ -115,6 +118,7 @@ ccdr.run <- function(data,
 #    this is handled internally by ccdr_gridR and ccdr_singleR.
 #
 ccdr_call <- function(data,
+                      ivn = NULL,
                       betas,
                       lambdas,
                       lambdas.length,
@@ -149,6 +153,20 @@ ccdr_call <- function(data,
     nn <- as.integer(nrow(data))
     pp <- as.integer(ncol(data))
 
+    if(is.null(ivn)) ivn <- vector("list", nn) # to pass testthat for observational data cases
+    ### Check ivn
+    if(!check_if_ivn_list(ivn)) stop("ivn must be a list of NULLs or vectors!")
+    if(!check_ivn_size(ivn, data)) stop(sprintf("Length of ivn is %d, expected to match the number of rows in data: %d.", length(ivn), nn))
+    check_ivn_label(ivn, data)
+    ### if(!check_ivn_label(ivn, data)) stop("Intervention labels are incorrect.")
+
+    ### use a vector nj to count how many times each node is under intervention
+    ### refer to nj as "intervention times vector"
+    nj <- rep(0, pp)
+    for(j in 1:pp) { ## include 0 here or not?
+        nj[j] <- sum(!sapply(lapply(ivn, is.element, j), any)) ## optimize for sorted column?
+    }
+
     ### Use default values for lambda if not specified
     if(is.null(lambdas)){
         if(is.null(lambdas.length)){
@@ -188,6 +206,7 @@ ccdr_call <- function(data,
 #     }
 
     ### By default, set the initial guess for betas to be all zeroes
+
     if(missing(betas)){
         betas <- matrix(0, nrow = pp, ncol = pp)
         # betas <- SparseBlockMatrixR(betas) # 2015-03-26: Deprecated and replaced with .init_sbm below
@@ -197,7 +216,6 @@ ccdr_call <- function(data,
         #   Still need to set start = 0, though.
         betas$start <- 0
     } # Type-checking for betas happens in ccdr_singleR
-
     # This parameter can be set by the user, but in order to prevent the algorithm from taking too long to run
     #  it is a good idea to keep the threshold used by default which is O(sqrt(pp))
     if(is.null(max.iters)){
@@ -207,12 +225,16 @@ ccdr_call <- function(data,
     t1.cor <- proc.time()[3]
     #     cors <- cor(data)
     #     cors <- cors[upper.tri(cors, diag = TRUE)]
-    cors <- sparsebnUtils::cor_vector(data)
+    corlist <- sparsebnUtils::cor_vector_ivn(data, ivn)
+    cors <- corlist$cors
+    indexj <- corlist$indexj
     t2.cor <- proc.time()[3]
 
     fit <- ccdr_gridR(cors,
                       as.integer(pp),
                       as.integer(nn),
+                      as.integer(nj),
+                      as.integer(indexj),
                       betas,
                       as.numeric(lambdas),
                       as.numeric(gamma),
@@ -245,6 +267,8 @@ ccdr_call <- function(data,
 #   Main subroutine for running the CCDr algorithm on a grid of lambda values.
 ccdr_gridR <- function(cors,
                        pp, nn,
+                       nj = NULL,
+                       indexj = NULL,
                        betas,
                        lambdas,
                        gamma,
@@ -261,13 +285,21 @@ ccdr_gridR <- function(cors,
     ### nlam is now set automatically
     nlam <- length(lambdas)
 
+    ### Check indexj
+    if(is.null(indexj)) indexj <- rep(0L, pp + 1)
+    ### Check nj
+    if(is.null(nj)) nj <- as.integer(rep(nn, pp))
+
     ccdr.out <- list()
     for(i in 1:nlam){
+
         if(verbose) message("Working on lambda = ", round(lambdas[i], 5), " [", i, "/", nlam, "]")
 
         t1.ccdr <- proc.time()[3]
         ccdr.out[[i]] <- ccdr_singleR(cors,
                                       pp, nn,
+                                      nj,
+                                      indexj,
                                       betas,
                                       lambdas[i],
                                       gamma = gamma,
@@ -304,6 +336,8 @@ ccdr_gridR <- function(cors,
 #    called. Type-checking is strongly enforced here.
 ccdr_singleR <- function(cors,
                          pp, nn,
+                         nj = NULL,
+                         indexj = NULL,
                          betas,
                          lambda,
                          gamma,
@@ -313,9 +347,27 @@ ccdr_singleR <- function(cors,
                          verbose = FALSE
 ){
 
+    if(is.null(indexj)) indexj <- rep(0L, pp + 1)
+    ### Check indexj
+    if(!is.vector(indexj)) stop("Index vector for cors is not a vector.")
+    if(length(indexj) > pp + 1) stop(sprintf("Index vector for cors is too long, expected to be no greater than %d, the number of columns of data.", pp))
+    if(!is.integer(indexj)) stop("Index vector for cors has non-integer component(s).")
+    if(any(indexj < 0 | indexj > pp + 1)) stop(sprintf("Index vector for cors has out-of-range component(s), expected to be between 0 and %d.", pp))
+
+    if(is.null(nj)) nj <- as.integer(rep(nn, pp))
+    ### Check nj
+    if(!is.vector(nj)) stop("Intervention times vector is not a vector.")
+    if(length(nj) != pp) stop(sprintf("Length of intervention times vector is %d, expected %d% to match the number of columns of data", length(nj), pp))
+    if(!is.integer(nj)) stop("Intervention times vector has non-integer component(s).")
+    if(any(nj < 0 | nj > nn)) stop(sprintf("Intervention times vector has out-of-range component(s), expected to be between 0 and %d.", nn))
+
+    ### add a weight a_j to penalty on beta_{ij}
+    ### since now with intervention data, beta_{ij} only appears n_j times out of total nn samples
+    aj <- nj / nn
+
     ### Check cors
     if(!is.numeric(cors)) stop("cors must be a numeric vector!")
-    if(length(cors) != pp*(pp+1)/2) stop(paste0("cors has incorrect length: Expected length = ", pp*(pp+1)/2, " input length = ", length(cors)))
+    if(length(cors) != length(unique(indexj))*pp*(pp+1)/2) stop(paste0("cors has incorrect length: Expected length = ", length(unique(indexj))*pp*(pp+1)/2, " input length = ", length(cors)))
 
     ### Check dimension parameters
     if(!is.integer(pp) || !is.integer(nn)) stop("Both pp and nn must be integers!")
@@ -354,7 +406,9 @@ ccdr_singleR <- function(cors,
     t1.ccdr <- proc.time()[3]
     ccdr.out <- singleCCDr(cors,
                            betas,
-                           nn,
+                           nj,
+                           indexj,
+                           aj,
                            lambda,
                            c(gamma, eps, maxIters, alpha),
                            verbose = verbose)
 
@@ -56,6 +56,10 @@ is.SparseBlockMatrixR <- function(x){
     inherits(x, "SparseBlockMatrixR")
 } # END IS.SPARSEBLOCKMATRIXR
 
+as.SparseBlockMatrixR <- function(x){
+    SparseBlockMatrixR(x) # NOTE: S3 delegation is implicitly handled by the constructor here
+}
+
 #------------------------------------------------------------------------------#
 # reIndexC.SparseBlockMatrixR
 #  Re-indexing TO C for SparseBlockMatrixR objects
@@ -201,30 +205,6 @@ SparseBlockMatrixR.matrix <- function(x, sigmas, ...){
     SparseBlockMatrixR(sparsebnUtils::as.sparse(x), sigmas, ...)
 } # END SPARSEBLOCKMATRIXR.MATRIX
 
-#------------------------------------------------------------------------------#
-# as.SparseBlockMatrixR.list
-#  Convert FROM list TO SparseBlockMatrixR
-#
-as.SparseBlockMatrixR.list <- function(x){
-    SparseBlockMatrixR(x)
-} # END AS.SPARSEBLOCKMATRIXR.LIST
-
-#------------------------------------------------------------------------------#
-# as.SparseBlockMatrixR.sparse
-#  Convert FROM sparse TO SparseBlockMatrixR
-#
-as.SparseBlockMatrixR.sparse <- function(x){
-    SparseBlockMatrixR(x)
-} # END AS.SPARSEBLOCKMATRIXR.SPARSE
-
-#------------------------------------------------------------------------------#
-# as.SparseBlockMatrixR.matrix
-#  Convert FROM matrix TO SparseBlockMatrixR
-#
-as.SparseBlockMatrixR.matrix <- function(x){
-    SparseBlockMatrixR(x)
-} # END AS.SPARSEBLOCKMATRIXR.MATRIX
-
 #------------------------------------------------------------------------------#
 # as.list.SparseBlockMatrixR
 #  Convert FROM SparseBlockMatrixR TO list
@@ -261,10 +241,11 @@ as.matrix.SparseBlockMatrixR <- function(x){
 } # END AS.MATRIX.SPARSEBLOCKMATRIXR
 
 #------------------------------------------------------------------------------#
-# as.edgeList.SparseBlockMatrixR
+# edgeList.SparseBlockMatrixR
 # Coerce SBM to edge list
 #
-as.edgeList.SparseBlockMatrixR <- function(x){
+#' @export
+edgeList.SparseBlockMatrixR <- function(x){
     #
     # We have to be careful in obtaining the edge list of a SparseBlockMatrixR object:
     #  It is NOT the same as the rows slot since some of these components may have
@@ -278,12 +259,13 @@ as.edgeList.SparseBlockMatrixR <- function(x){
     el <- mapply(function(x, y){ y[which(abs(x) > sparsebnUtils::zero_threshold())]}, x$vals, x$rows)
 
     sparsebnUtils::edgeList(el)
-} # AS.EDGELIST.SPARSEBLOCKMATRIXR
+} # EDGELIST.SPARSEBLOCKMATRIXR
 
 #------------------------------------------------------------------------------#
 # sparse.SparseBlockMatrixR
 # 2016-01-22: Migrated to this file from s3-sparse.R
 #
+#' @export
 sparse.SparseBlockMatrixR <- function(x, index = "R", ...){
 
     if(index != "R" && index != "C") stop("Invalid entry for index parameter: Must be either 'R' or 'C'!")
@@ -321,16 +303,16 @@ sparse.SparseBlockMatrixR <- function(x, index = "R", ...){
     }
 } # END SPARSE.SPARSEBLOCKMATRIXR
 
-#------------------------------------------------------------------------------#
-# as.sparse.SparseBlockMatrixR
-#  Convert FROM SparseBlockMatrixR TO sparse
-#  By default, return the object using R indexing. If desired, the method can return C-style indexing by setting
-#    index = "C".
-# 2016-01-22: Migrated to this file from s3-sparse.R
-#
-as.sparse.SparseBlockMatrixR <- function(x, index = "R", ...){
-    sparse.SparseBlockMatrixR(x, index)
-} # END AS.SPARSE.SPARSEBLOCKMATRIXR
+# #------------------------------------------------------------------------------#
+# # as.sparse.SparseBlockMatrixR
+# #  Convert FROM SparseBlockMatrixR TO sparse
+# #  By default, return the object using R indexing. If desired, the method can return C-style indexing by setting
+# #    index = "C".
+# # 2016-01-22: Migrated to this file from s3-sparse.R
+# #
+# as.sparse.SparseBlockMatrixR <- function(x, index = "R", ...){
+#     sparse.SparseBlockMatrixR(x, index)
+# } # END AS.SPARSE.SPARSEBLOCKMATRIXR
 
 # to_graphNEL.SparseBlockMatrixR
 #  Convert SBM object to graphNEL object
@@ -347,7 +329,7 @@ to_graphNEL.SparseBlockMatrixR <- function(x){
 } # END TO_GRAPHNEL.SPARSEBLOCKMATRIXR
 
 get.adjacency.matrix.SparseBlockMatrixR <- function(x){
-    sparsebnUtils::get.adjacency.matrix(as.edgeList.SparseBlockMatrixR(x))
+    sparsebnUtils::get.adjacency.matrix(sparsebnUtils::as.edgeList(x))
 } # END GET.ADJACENCY.MATRIX.SPARSEBLOCKMATRIXR
 
 num.nodes.SparseBlockMatrixR <- function(x){
@@ -357,7 +339,7 @@ num.nodes.SparseBlockMatrixR <- function(x){
 
 num.edges.SparseBlockMatrixR <- function(x){
     ### The number of nodes should be exactly the same as the length of the rows list
-    sparsebnUtils::num.edges(as.edgeList.SparseBlockMatrixR(x))
+    sparsebnUtils::num.edges(sparsebnUtils::as.edgeList(x))
 } # END NUM.EDGES.SPARSEBLOCKMATRIXR
 
 # This function is (so far) only used in unit tests
Original file line number	Diff line number	Diff line change
`@@ -1,11 +1,11 @@`
`1`		`-# This file was generated by Rcpp::compileAttributes`
	`1`	`+# Generated by using Rcpp::compileAttributes() -> do not edit by hand`
`2`	`2`	`# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393`
`3`	`3`
`4`		`-gridCCDr <- function(cors, init_betas, nn, lambdas, params, verbose) {`
`5`		`- .Call('ccdrAlgorithm_gridCCDr', PACKAGE = 'ccdrAlgorithm', cors, init_betas, nn, lambdas, params, verbose)`
	`4`	`+gridCCDr <- function(cors, init_betas, nj, indexj, aj, lambdas, params, verbose) {`
	`5`	`+ .Call('ccdrAlgorithm_gridCCDr', PACKAGE = 'ccdrAlgorithm', cors, init_betas, nj, indexj, aj, lambdas, params, verbose)`
`6`	`6`	`}`
`7`	`7`
`8`		`-singleCCDr <- function(cors, init_betas, nn, lambda, params, verbose) {`
`9`		`- .Call('ccdrAlgorithm_singleCCDr', PACKAGE = 'ccdrAlgorithm', cors, init_betas, nn, lambda, params, verbose)`
	`8`	`+singleCCDr <- function(cors, init_betas, nj, indexj, aj, lambda, params, verbose) {`
	`9`	`+ .Call('ccdrAlgorithm_singleCCDr', PACKAGE = 'ccdrAlgorithm', cors, init_betas, nj, indexj, aj, lambda, params, verbose)`
`10`	`10`	`}`
`11`	`11`