Skip to content

Commit 20c71fd

Browse files
Merge branch 'dev'
2 parents a843cbb + 0cfe91f commit 20c71fd

27 files changed

+826
-241
lines changed

.Rbuildignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,8 @@
44
^ccdrAlgorithm\.Rproj$
55
^\.Rproj\.user$
66
^\.travis\.yml$
7+
inst/db
8+
man-roxygen
9+
^\.httr-oauth$
10+
^revdep$
11+
^codecov\.yml$

.travis.yml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,3 @@ r:
1212

1313
# BiocInstaller required for 'graph' package from Bioconductor
1414
bioc_required: true
15-
16-
# Install up-to-date dependencies from GitHub
17-
r_github_packages:
18-
- itsrainingdata/sparsebnUtils

DESCRIPTION

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
Package: ccdrAlgorithm
22
Title: CCDr Algorithm for Learning Sparse Gaussian Bayesian Networks
3-
Version: 0.0.1
4-
Date: 2016-08-08
5-
Authors@R: person("Bryon", "Aragam", email = "sparsebn@gmail.com", role = c("aut", "cre"))
3+
Version: 0.0.2
4+
Date: 2016-11-19
5+
Authors@R: c(
6+
person("Bryon", "Aragam", email = "sparsebn@gmail.com", role = c("aut", "cre")),
7+
person("Dacheng", "Zhang", role = c("aut"))
8+
)
69
Maintainer: Bryon Aragam <sparsebn@gmail.com>
710
Description: Implementation of the CCDr (Concave penalized Coordinate Descent with reparametrization) structure learning algorithm as described in Aragam and Zhou (2015) <http://www.jmlr.org/papers/v16/aragam15a.html>. This is a fast, score-based method for learning Bayesian networks that uses sparse regularization and block-cyclic coordinate descent.
811
Depends:

NAMESPACE

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
# Generated by roxygen2: do not edit by hand
22

3+
S3method(edgeList,SparseBlockMatrixR)
4+
S3method(sparse,SparseBlockMatrixR)
35
export(ccdr.run)
46
importFrom(Rcpp,sourceCpp)
7+
importFrom(sparsebnUtils,edgeList)
58
importFrom(sparsebnUtils,get.adjacency.matrix)
69
importFrom(sparsebnUtils,is.zero)
710
importFrom(sparsebnUtils,num.edges)

NEWS.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# ccdrAlgorithm 0.0.2
2+
3+
## Features
4+
5+
* `ccdr.run()` is now compatible with interventional data
6+
7+
# ccdrAlgorithm 0.0.1
8+
9+
* Initial stable release
10+

R/RcppExports.R

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
# This file was generated by Rcpp::compileAttributes
1+
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
22
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
33

4-
gridCCDr <- function(cors, init_betas, nn, lambdas, params, verbose) {
5-
.Call('ccdrAlgorithm_gridCCDr', PACKAGE = 'ccdrAlgorithm', cors, init_betas, nn, lambdas, params, verbose)
4+
gridCCDr <- function(cors, init_betas, nj, indexj, aj, lambdas, params, verbose) {
5+
.Call('ccdrAlgorithm_gridCCDr', PACKAGE = 'ccdrAlgorithm', cors, init_betas, nj, indexj, aj, lambdas, params, verbose)
66
}
77

8-
singleCCDr <- function(cors, init_betas, nn, lambda, params, verbose) {
9-
.Call('ccdrAlgorithm_singleCCDr', PACKAGE = 'ccdrAlgorithm', cors, init_betas, nn, lambda, params, verbose)
8+
singleCCDr <- function(cors, init_betas, nj, indexj, aj, lambda, params, verbose) {
9+
.Call('ccdrAlgorithm_singleCCDr', PACKAGE = 'ccdrAlgorithm', cors, init_betas, nj, indexj, aj, lambda, params, verbose)
1010
}
1111

R/ccdrAlgorithm-functions.R

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
## returns TRUE if ivn_list is a list of vectors or NULL elements,
2+
check_if_ivn_list <- function(ivn) {
3+
## check if it is a list
4+
if(!is.list(ivn)) return(FALSE)
5+
6+
## check if every component is a vector of NULL
7+
return(all(sapply(ivn, is.vector) | sapply(ivn, is.null)))
8+
} # END CHECK_IF_IVN_LIST
9+
10+
## returns TRUE if ivn_list has length nn, the number of sample rows
11+
check_ivn_size <- function(ivn, data) {
12+
## check if length matches with nn
13+
return(length(ivn) == nrow(data))
14+
} # END CHECK_IF_IVN_SIZE
15+
16+
## returns TRUE if a vector component of 'ivn' is NULL,
17+
## or has all correct labels of nodes under intervention in this sample:
18+
## 1) integer, 2) between 1 and pp, and 3) no duplicates
19+
check_vector_label <- function(vec, pp) {
20+
21+
if(is.null(vec)) return(TRUE)
22+
23+
## Note: If a vector has only integers and NAs, is.integer returns all TRUE
24+
## e.g.: c(NA, 1L, NA, 3L, NA, 5L)
25+
## However, c(1L, NA, 3L, 4, NA) returns all FALSE
26+
## check if labels are integers
27+
if(any(is.na(vec)) || !is.integer(vec)) {
28+
stop("Non-integer label(s) found in one or more components in ivn.")
29+
return(FALSE)
30+
}
31+
32+
## check if labels are in 1..pp
33+
if(any(vec < 1) | any(vec > pp)) {
34+
stop(sprintf("Labels should all be between 1 and %d to refer to the columns of data.", pp))
35+
return(FALSE)
36+
}
37+
38+
## check if labels are unique
39+
if(anyDuplicated(vec)) {
40+
stop("Duplicated label(s) found in one component in ivn.")
41+
return(FALSE)
42+
}
43+
44+
return(TRUE)
45+
} # END CHECK_VECTOR_LABEL
46+
47+
## returns TRUE if every vector in 'ivn' is NULL,
48+
## or has correct labels: integer, between 1 and pp, and no duplicates
49+
check_ivn_label <- function(ivn, data) {
50+
sapply(ivn, check_vector_label, ncol(data))
51+
} # END CHECK_IVN_LABEL
52+

R/ccdrAlgorithm-main.R

Lines changed: 59 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,14 @@ ccdr.run <- function(data,
9292
### Check data format
9393
if(!sparsebnUtils::is.sparsebnData(data)) stop(sparsebnUtils::input_not_sparsebnData(data))
9494

95-
### Extract the data (CCDr only works on observational data, so ignore the intervention part)
95+
### Extract the data and ivn
96+
### CCDr now works on both observational data and interventional data, and a mixture of both
9697
data_matrix <- data$data
98+
ivn_list <- data$ivn
9799

98100
### Call the CCDr algorithm
99101
ccdr_call(data = data_matrix,
102+
ivn = ivn_list,
100103
betas = betas,
101104
lambdas = lambdas,
102105
lambdas.length = lambdas.length,
@@ -115,6 +118,7 @@ ccdr.run <- function(data,
115118
# this is handled internally by ccdr_gridR and ccdr_singleR.
116119
#
117120
ccdr_call <- function(data,
121+
ivn = NULL,
118122
betas,
119123
lambdas,
120124
lambdas.length,
@@ -149,6 +153,20 @@ ccdr_call <- function(data,
149153
nn <- as.integer(nrow(data))
150154
pp <- as.integer(ncol(data))
151155

156+
if(is.null(ivn)) ivn <- vector("list", nn) # to pass testthat for observational data cases
157+
### Check ivn
158+
if(!check_if_ivn_list(ivn)) stop("ivn must be a list of NULLs or vectors!")
159+
if(!check_ivn_size(ivn, data)) stop(sprintf("Length of ivn is %d, expected to match the number of rows in data: %d.", length(ivn), nn))
160+
check_ivn_label(ivn, data)
161+
### if(!check_ivn_label(ivn, data)) stop("Intervention labels are incorrect.")
162+
163+
### use a vector nj to count how many times each node is under intervention
164+
### refer to nj as "intervention times vector"
165+
nj <- rep(0, pp)
166+
for(j in 1:pp) { ## include 0 here or not?
167+
nj[j] <- sum(!sapply(lapply(ivn, is.element, j), any)) ## optimize for sorted column?
168+
}
169+
152170
### Use default values for lambda if not specified
153171
if(is.null(lambdas)){
154172
if(is.null(lambdas.length)){
@@ -188,6 +206,7 @@ ccdr_call <- function(data,
188206
# }
189207

190208
### By default, set the initial guess for betas to be all zeroes
209+
191210
if(missing(betas)){
192211
betas <- matrix(0, nrow = pp, ncol = pp)
193212
# betas <- SparseBlockMatrixR(betas) # 2015-03-26: Deprecated and replaced with .init_sbm below
@@ -197,7 +216,6 @@ ccdr_call <- function(data,
197216
# Still need to set start = 0, though.
198217
betas$start <- 0
199218
} # Type-checking for betas happens in ccdr_singleR
200-
201219
# This parameter can be set by the user, but in order to prevent the algorithm from taking too long to run
202220
# it is a good idea to keep the threshold used by default which is O(sqrt(pp))
203221
if(is.null(max.iters)){
@@ -207,12 +225,16 @@ ccdr_call <- function(data,
207225
t1.cor <- proc.time()[3]
208226
# cors <- cor(data)
209227
# cors <- cors[upper.tri(cors, diag = TRUE)]
210-
cors <- sparsebnUtils::cor_vector(data)
228+
corlist <- sparsebnUtils::cor_vector_ivn(data, ivn)
229+
cors <- corlist$cors
230+
indexj <- corlist$indexj
211231
t2.cor <- proc.time()[3]
212232

213233
fit <- ccdr_gridR(cors,
214234
as.integer(pp),
215235
as.integer(nn),
236+
as.integer(nj),
237+
as.integer(indexj),
216238
betas,
217239
as.numeric(lambdas),
218240
as.numeric(gamma),
@@ -245,6 +267,8 @@ ccdr_call <- function(data,
245267
# Main subroutine for running the CCDr algorithm on a grid of lambda values.
246268
ccdr_gridR <- function(cors,
247269
pp, nn,
270+
nj = NULL,
271+
indexj = NULL,
248272
betas,
249273
lambdas,
250274
gamma,
@@ -261,13 +285,21 @@ ccdr_gridR <- function(cors,
261285
### nlam is now set automatically
262286
nlam <- length(lambdas)
263287

288+
### Check indexj
289+
if(is.null(indexj)) indexj <- rep(0L, pp + 1)
290+
### Check nj
291+
if(is.null(nj)) nj <- as.integer(rep(nn, pp))
292+
264293
ccdr.out <- list()
265294
for(i in 1:nlam){
295+
266296
if(verbose) message("Working on lambda = ", round(lambdas[i], 5), " [", i, "/", nlam, "]")
267297

268298
t1.ccdr <- proc.time()[3]
269299
ccdr.out[[i]] <- ccdr_singleR(cors,
270300
pp, nn,
301+
nj,
302+
indexj,
271303
betas,
272304
lambdas[i],
273305
gamma = gamma,
@@ -304,6 +336,8 @@ ccdr_gridR <- function(cors,
304336
# called. Type-checking is strongly enforced here.
305337
ccdr_singleR <- function(cors,
306338
pp, nn,
339+
nj = NULL,
340+
indexj = NULL,
307341
betas,
308342
lambda,
309343
gamma,
@@ -313,9 +347,27 @@ ccdr_singleR <- function(cors,
313347
verbose = FALSE
314348
){
315349

350+
if(is.null(indexj)) indexj <- rep(0L, pp + 1)
351+
### Check indexj
352+
if(!is.vector(indexj)) stop("Index vector for cors is not a vector.")
353+
if(length(indexj) > pp + 1) stop(sprintf("Index vector for cors is too long, expected to be no greater than %d, the number of columns of data.", pp))
354+
if(!is.integer(indexj)) stop("Index vector for cors has non-integer component(s).")
355+
if(any(indexj < 0 | indexj > pp + 1)) stop(sprintf("Index vector for cors has out-of-range component(s), expected to be between 0 and %d.", pp))
356+
357+
if(is.null(nj)) nj <- as.integer(rep(nn, pp))
358+
### Check nj
359+
if(!is.vector(nj)) stop("Intervention times vector is not a vector.")
360+
if(length(nj) != pp) stop(sprintf("Length of intervention times vector is %d, expected %d% to match the number of columns of data", length(nj), pp))
361+
if(!is.integer(nj)) stop("Intervention times vector has non-integer component(s).")
362+
if(any(nj < 0 | nj > nn)) stop(sprintf("Intervention times vector has out-of-range component(s), expected to be between 0 and %d.", nn))
363+
364+
### add a weight a_j to penalty on beta_{ij}
365+
### since now with intervention data, beta_{ij} only appears n_j times out of total nn samples
366+
aj <- nj / nn
367+
316368
### Check cors
317369
if(!is.numeric(cors)) stop("cors must be a numeric vector!")
318-
if(length(cors) != pp*(pp+1)/2) stop(paste0("cors has incorrect length: Expected length = ", pp*(pp+1)/2, " input length = ", length(cors)))
370+
if(length(cors) != length(unique(indexj))*pp*(pp+1)/2) stop(paste0("cors has incorrect length: Expected length = ", length(unique(indexj))*pp*(pp+1)/2, " input length = ", length(cors)))
319371

320372
### Check dimension parameters
321373
if(!is.integer(pp) || !is.integer(nn)) stop("Both pp and nn must be integers!")
@@ -354,7 +406,9 @@ ccdr_singleR <- function(cors,
354406
t1.ccdr <- proc.time()[3]
355407
ccdr.out <- singleCCDr(cors,
356408
betas,
357-
nn,
409+
nj,
410+
indexj,
411+
aj,
358412
lambda,
359413
c(gamma, eps, maxIters, alpha),
360414
verbose = verbose)

R/s3-SparseBlockMatrixR.R

Lines changed: 21 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@ is.SparseBlockMatrixR <- function(x){
5656
inherits(x, "SparseBlockMatrixR")
5757
} # END IS.SPARSEBLOCKMATRIXR
5858

59+
as.SparseBlockMatrixR <- function(x){
60+
SparseBlockMatrixR(x) # NOTE: S3 delegation is implicitly handled by the constructor here
61+
}
62+
5963
#------------------------------------------------------------------------------#
6064
# reIndexC.SparseBlockMatrixR
6165
# Re-indexing TO C for SparseBlockMatrixR objects
@@ -201,30 +205,6 @@ SparseBlockMatrixR.matrix <- function(x, sigmas, ...){
201205
SparseBlockMatrixR(sparsebnUtils::as.sparse(x), sigmas, ...)
202206
} # END SPARSEBLOCKMATRIXR.MATRIX
203207

204-
#------------------------------------------------------------------------------#
205-
# as.SparseBlockMatrixR.list
206-
# Convert FROM list TO SparseBlockMatrixR
207-
#
208-
as.SparseBlockMatrixR.list <- function(x){
209-
SparseBlockMatrixR(x)
210-
} # END AS.SPARSEBLOCKMATRIXR.LIST
211-
212-
#------------------------------------------------------------------------------#
213-
# as.SparseBlockMatrixR.sparse
214-
# Convert FROM sparse TO SparseBlockMatrixR
215-
#
216-
as.SparseBlockMatrixR.sparse <- function(x){
217-
SparseBlockMatrixR(x)
218-
} # END AS.SPARSEBLOCKMATRIXR.SPARSE
219-
220-
#------------------------------------------------------------------------------#
221-
# as.SparseBlockMatrixR.matrix
222-
# Convert FROM matrix TO SparseBlockMatrixR
223-
#
224-
as.SparseBlockMatrixR.matrix <- function(x){
225-
SparseBlockMatrixR(x)
226-
} # END AS.SPARSEBLOCKMATRIXR.MATRIX
227-
228208
#------------------------------------------------------------------------------#
229209
# as.list.SparseBlockMatrixR
230210
# Convert FROM SparseBlockMatrixR TO list
@@ -261,10 +241,11 @@ as.matrix.SparseBlockMatrixR <- function(x){
261241
} # END AS.MATRIX.SPARSEBLOCKMATRIXR
262242

263243
#------------------------------------------------------------------------------#
264-
# as.edgeList.SparseBlockMatrixR
244+
# edgeList.SparseBlockMatrixR
265245
# Coerce SBM to edge list
266246
#
267-
as.edgeList.SparseBlockMatrixR <- function(x){
247+
#' @export
248+
edgeList.SparseBlockMatrixR <- function(x){
268249
#
269250
# We have to be careful in obtaining the edge list of a SparseBlockMatrixR object:
270251
# It is NOT the same as the rows slot since some of these components may have
@@ -278,12 +259,13 @@ as.edgeList.SparseBlockMatrixR <- function(x){
278259
el <- mapply(function(x, y){ y[which(abs(x) > sparsebnUtils::zero_threshold())]}, x$vals, x$rows)
279260

280261
sparsebnUtils::edgeList(el)
281-
} # AS.EDGELIST.SPARSEBLOCKMATRIXR
262+
} # EDGELIST.SPARSEBLOCKMATRIXR
282263

283264
#------------------------------------------------------------------------------#
284265
# sparse.SparseBlockMatrixR
285266
# 2016-01-22: Migrated to this file from s3-sparse.R
286267
#
268+
#' @export
287269
sparse.SparseBlockMatrixR <- function(x, index = "R", ...){
288270

289271
if(index != "R" && index != "C") stop("Invalid entry for index parameter: Must be either 'R' or 'C'!")
@@ -321,16 +303,16 @@ sparse.SparseBlockMatrixR <- function(x, index = "R", ...){
321303
}
322304
} # END SPARSE.SPARSEBLOCKMATRIXR
323305

324-
#------------------------------------------------------------------------------#
325-
# as.sparse.SparseBlockMatrixR
326-
# Convert FROM SparseBlockMatrixR TO sparse
327-
# By default, return the object using R indexing. If desired, the method can return C-style indexing by setting
328-
# index = "C".
329-
# 2016-01-22: Migrated to this file from s3-sparse.R
330-
#
331-
as.sparse.SparseBlockMatrixR <- function(x, index = "R", ...){
332-
sparse.SparseBlockMatrixR(x, index)
333-
} # END AS.SPARSE.SPARSEBLOCKMATRIXR
306+
# #------------------------------------------------------------------------------#
307+
# # as.sparse.SparseBlockMatrixR
308+
# # Convert FROM SparseBlockMatrixR TO sparse
309+
# # By default, return the object using R indexing. If desired, the method can return C-style indexing by setting
310+
# # index = "C".
311+
# # 2016-01-22: Migrated to this file from s3-sparse.R
312+
# #
313+
# as.sparse.SparseBlockMatrixR <- function(x, index = "R", ...){
314+
# sparse.SparseBlockMatrixR(x, index)
315+
# } # END AS.SPARSE.SPARSEBLOCKMATRIXR
334316

335317
# to_graphNEL.SparseBlockMatrixR
336318
# Convert SBM object to graphNEL object
@@ -347,7 +329,7 @@ to_graphNEL.SparseBlockMatrixR <- function(x){
347329
} # END TO_GRAPHNEL.SPARSEBLOCKMATRIXR
348330

349331
get.adjacency.matrix.SparseBlockMatrixR <- function(x){
350-
sparsebnUtils::get.adjacency.matrix(as.edgeList.SparseBlockMatrixR(x))
332+
sparsebnUtils::get.adjacency.matrix(sparsebnUtils::as.edgeList(x))
351333
} # END GET.ADJACENCY.MATRIX.SPARSEBLOCKMATRIXR
352334

353335
num.nodes.SparseBlockMatrixR <- function(x){
@@ -357,7 +339,7 @@ num.nodes.SparseBlockMatrixR <- function(x){
357339

358340
num.edges.SparseBlockMatrixR <- function(x){
359341
### The number of nodes should be exactly the same as the length of the rows list
360-
sparsebnUtils::num.edges(as.edgeList.SparseBlockMatrixR(x))
342+
sparsebnUtils::num.edges(sparsebnUtils::as.edgeList(x))
361343
} # END NUM.EDGES.SPARSEBLOCKMATRIXR
362344

363345
# This function is (so far) only used in unit tests

0 commit comments

Comments
 (0)