Skip to content

Commit abc37ea

Browse files
Merge branch 'dev'
2 parents d9a961a + 748c5a1 commit abc37ea

26 files changed

+807
-241
lines changed

DESCRIPTION

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: ccdrAlgorithm
22
Title: CCDr Algorithm for Learning Sparse Gaussian Bayesian Networks
3-
Version: 0.0.3
4-
Date: 2017-03-09
3+
Version: 0.0.4
4+
Date: 2017-09-09
55
Authors@R: c(
66
person("Bryon", "Aragam", email = "sparsebn@gmail.com", role = c("aut", "cre")),
77
person("Dacheng", "Zhang", role = c("aut"))
@@ -11,7 +11,7 @@ Description: Implementation of the CCDr (Concave penalized Coordinate Descent wi
1111
Depends:
1212
R (>= 3.2.3)
1313
Imports:
14-
sparsebnUtils (>= 0.0.4),
14+
sparsebnUtils (>= 0.0.5),
1515
Rcpp (>= 0.11.0),
1616
stats,
1717
utils

NEWS.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,14 @@
1+
# ccdrAlgorithm 0.0.4
2+
3+
## Features
4+
5+
* `ccdr.run` now supports black lists and white lists (sparsebn #6)
6+
* Users can now use prior knowledge of variances via the new `sigmas` argument to `ccdr.run` (sparsebn #12)
7+
8+
## Bug fixes
9+
10+
* `ivn.rand = FALSE` not handled correctly (#4)
11+
112
# ccdrAlgorithm 0.0.3
213

314
## Features

R/RcppExports.R

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
22
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
33

4-
gridCCDr <- function(cors, init_betas, nj, indexj, aj, lambdas, params, verbose) {
5-
.Call('ccdrAlgorithm_gridCCDr', PACKAGE = 'ccdrAlgorithm', cors, init_betas, nj, indexj, aj, lambdas, params, verbose)
4+
gridCCDr <- function(cors, init_betas, init_sigmas, nj, indexj, aj, lambdas, weights, params, verbose) {
5+
.Call(`_ccdrAlgorithm_gridCCDr`, cors, init_betas, init_sigmas, nj, indexj, aj, lambdas, weights, params, verbose)
66
}
77

8-
singleCCDr <- function(cors, init_betas, nj, indexj, aj, lambda, params, verbose) {
9-
.Call('ccdrAlgorithm_singleCCDr', PACKAGE = 'ccdrAlgorithm', cors, init_betas, nj, indexj, aj, lambda, params, verbose)
8+
singleCCDr <- function(cors, init_betas, init_sigmas, nj, indexj, aj, lambda, weights, params, verbose) {
9+
.Call(`_ccdrAlgorithm_singleCCDr`, cors, init_betas, init_sigmas, nj, indexj, aj, lambda, weights, params, verbose)
1010
}
1111

R/ccdrAlgorithm-bwlist.R

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#
2+
# ccdrAlgorithm-bwlist.R
3+
# ccdrAlgorithm
4+
#
5+
# Created by Bryon Aragam (local) on 8/11/17.
6+
# Copyright (c) 2014-2017 Bryon Aragam. All rights reserved.
7+
#
8+
9+
#
10+
# PACKAGE CCDRALGORITHM: Helper methods for black/white lists
11+
#
12+
# CONTENTS:
13+
# names_to_indices
14+
# rows_to_list
15+
# bwlist_check
16+
# bwlist_to_weights
17+
#
18+
19+
### Just a wrapper for match with a better name
20+
names_to_indices <- function(v, names){
21+
match(v, names)
22+
} # END NAMES_TO_INDICES
23+
24+
### Returns a list whose components are the rows of a matrix
25+
rows_to_list <- function(m){
26+
lapply(1:nrow(m), function(j) m[j,])
27+
} # END ROWS_TO_LIST
28+
29+
### Check correctness of input and transform from matrix to list of indices
30+
bwlist_check <- function(bwlist, names){
31+
## Consistency checks
32+
if(!is.matrix(bwlist) || ncol(bwlist) != 2){
33+
stop("Input must be a matrix with exactly 2 columns!")
34+
}
35+
36+
if(any(is.na(bwlist))){
37+
stop("Input cannot have missing values!")
38+
}
39+
40+
### Convert characters names to indices
41+
if(is.character(bwlist)){
42+
bwlist <- as.vector(bwlist)
43+
bwlist <- names_to_indices(bwlist, names)
44+
bwlist <- matrix(bwlist, ncol = 2)
45+
}
46+
47+
storage.mode(bwlist) <- "integer" # This is important in ccdr_call to check overlap between blacklist and whitelist, fails if numerics are mixed with ints
48+
rows_to_list(bwlist)
49+
} # END BWLIST_CHECK
50+
51+
### Convert b/w lists to weight matrix of {-1,0,1}
52+
# -1 = black listed (guaranteed to be absent / zero)
53+
# 0 = white listed (guaranteed to be present / nonzero)
54+
# 1 = gray listed (may or may not be final model)
55+
bwlist_to_weights <- function(black, white, nnode){
56+
weights <- matrix(1L, ncol = nnode, nrow = nnode)
57+
58+
if(!is.null(white)){
59+
for(k in 1:length(white)){
60+
weights[white[[k]][1], white[[k]][2]] <- 0L
61+
}
62+
}
63+
64+
if(!is.null(black)){
65+
for(k in 1:length(black)){
66+
weights[black[[k]][1], black[[k]][2]] <- -1L
67+
}
68+
}
69+
70+
weights
71+
} # END BWLIST_TO_WEIGHTS

R/ccdrAlgorithm-main.R

Lines changed: 100 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -35,23 +35,36 @@ NULL
3535
#' This implementation includes two options for the penalty: (1) MCP, and (2) L1 (or Lasso). This option
3636
#' is controlled by the \code{gamma} argument.
3737
#'
38-
#' @param data Data as \code{\link[sparsebnUtils]{sparsebnData}}. Must be numeric and contain no missing values.
39-
#' @param betas Initial guess for the algorithm. Represents the weighted adjacency matrix
40-
#' of a DAG where the algorithm will begin searching for an optimal structure.
41-
#' @param lambdas (optional) Numeric vector containing a grid of lambda values (i.e. regularization
38+
#' @param data Data as \code{\link[sparsebnUtils]{sparsebnData}} object. Must be numeric and contain no missing values.
39+
#' @param lambdas Numeric vector containing a grid of lambda values (i.e. regularization
4240
#' parameters) to use in the solution path. If missing, a default grid of values will be
4341
#' used based on a decreasing log-scale (see also \link{generate.lambdas}).
4442
#' @param lambdas.length Integer number of values to include in the solution path. If \code{lambdas}
4543
#' has also been specified, this value will be ignored. Note also that the final
4644
#' solution path may contain fewer estimates (see
4745
#' \code{alpha}).
46+
#' @param whitelist A two-column matrix of edges that are guaranteed to be in each
47+
#' estimate (a "white list"). Each row in this matrix corresponds
48+
#' to an edge that is to be whitelisted. These edges can be
49+
#' specified by node name (as a \code{character} matrix), or by
50+
#' index (as a \code{numeric} matrix).
51+
#' @param blacklist A two-column matrix of edges that are guaranteed to be absent
52+
#' from each estimate (a "black list"). See argument
53+
#' "\code{whitelist}" above for more details.
4854
#' @param gamma Value of concavity parameter. If \code{gamma > 0}, then the MCP will be used
4955
#' with \code{gamma} as the concavity parameter. If \code{gamma < 0}, then the L1 penalty
5056
#' will be used and this value is otherwise ignored.
5157
#' @param error.tol Error tolerance for the algorithm, used to test for convergence.
5258
#' @param max.iters Maximum number of iterations for each internal sweep.
5359
#' @param alpha Threshold parameter used to terminate the algorithm whenever the number of edges in the
5460
#' current DAG estimate is \code{> alpha * ncol(data)}.
61+
#' @param betas Initial guess for the algorithm. Represents the weighted adjacency matrix
62+
#' of a DAG where the algorithm will begin searching for an optimal structure.
63+
#' @param sigmas Numeric vector of known values of conditional variances for each node in the network. If this is
64+
#' set by the user, these parameters will not be computed and the input will
65+
#' be used as the "true" values of the variances in the algorithm. Note that setting
66+
#' this to be all ones (i.e. \code{sigmas[j] = 1} for all \code{j}) is
67+
#' equivalent to using the least-squares loss.
5568
#' @param verbose \code{TRUE / FALSE} whether or not to print out progress and summary reports.
5669
#'
5770
#' @return A \code{\link[sparsebnUtils]{sparsebnPath}} object.
@@ -77,13 +90,16 @@ NULL
7790
#'
7891
#' @export
7992
ccdr.run <- function(data,
80-
betas,
8193
lambdas = NULL,
8294
lambdas.length = NULL,
95+
whitelist = NULL,
96+
blacklist = NULL,
8397
gamma = 2.0,
8498
error.tol = 1e-4,
8599
max.iters = NULL,
86100
alpha = 10,
101+
betas,
102+
sigmas = NULL,
87103
verbose = FALSE
88104
){
89105
### Check data format
@@ -94,12 +110,24 @@ ccdr.run <- function(data,
94110
data_matrix <- data$data
95111
ivn_list <- data$ivn
96112

113+
### If ivn_list contains character names, convert to indices
114+
if("character" %in% sparsebnUtils::list_classes(ivn_list)){
115+
ivn_list <- lapply(ivn_list, function(x){
116+
idx <- match(x, names(data_matrix))
117+
if(length(idx) == 0) NULL # return NULL if no match (=> observational)
118+
else idx
119+
})
120+
}
121+
97122
### Call the CCDr algorithm
98123
ccdr_call(data = data_matrix,
99124
ivn = ivn_list,
100125
betas = betas,
126+
sigmas = sigmas,
101127
lambdas = lambdas,
102128
lambdas.length = lambdas.length,
129+
whitelist = whitelist,
130+
blacklist = blacklist,
103131
gamma = gamma,
104132
error.tol = error.tol,
105133
rlam = NULL,
@@ -120,15 +148,19 @@ MAX_CCS_ARRAY_SIZE <- function() 10000
120148
ccdr_call <- function(data,
121149
ivn = NULL,
122150
betas,
151+
sigmas,
123152
lambdas,
124153
lambdas.length,
154+
whitelist = NULL,
155+
blacklist = NULL,
125156
gamma,
126157
error.tol,
127158
rlam,
128159
max.iters,
129160
alpha,
130161
verbose = FALSE
131162
){
163+
node_names <- names(data)
132164
# ### Allow users to input a data.frame, but kindly warn them about doing this
133165
# if(is.data.frame(data)){
134166
# warning(sparsebnUtils::alg_input_data_frame())
@@ -171,6 +203,11 @@ ccdr_call <- function(data,
171203
nj[j] <- sum(!sapply(lapply(ivn, is.element, j), any)) ## optimize for sorted column?
172204
}
173205

206+
### Set default for sigmas (negative values => ignore initial value and update as usual)
207+
if(is.null(sigmas)){
208+
sigmas <- rep(-1., pp)
209+
}
210+
174211
### Use default values for lambda if not specified
175212
if(is.null(lambdas)){
176213
if(is.null(lambdas.length)){
@@ -227,6 +264,23 @@ ccdr_call <- function(data,
227264
max.iters <- sparsebnUtils::default_max_iters(pp)
228265
}
229266

267+
### White/black lists
268+
# Be careful about handling various NULL cases
269+
if(!is.null(whitelist)) whitelist <- bwlist_check(whitelist, node_names)
270+
if(!is.null(blacklist)) blacklist <- bwlist_check(blacklist, node_names)
271+
272+
if(!is.null(whitelist) && !is.null(blacklist)){
273+
if(length(intersect(whitelist, blacklist)) > 0){
274+
badinput <- vapply(intersect(whitelist, blacklist), function(x) sprintf("\t[%s]\n", paste(x, collapse = ",")), FUN.VALUE = "vector")
275+
badinput <- paste(badinput, collapse = "")
276+
msg <- sprintf("Duplicate entries found in blacklist and whitelist: \n%s", badinput)
277+
stop(msg)
278+
}
279+
}
280+
281+
weights <- bwlist_to_weights(blacklist, whitelist, nnode = pp)
282+
283+
### Pre-process correlation data
230284
t1.cor <- proc.time()[3]
231285
# cors <- cor(data)
232286
# cors <- cors[upper.tri(cors, diag = TRUE)]
@@ -241,7 +295,9 @@ ccdr_call <- function(data,
241295
as.integer(nj),
242296
as.integer(indexj),
243297
betas,
298+
as.numeric(sigmas),
244299
as.numeric(lambdas),
300+
as.integer(weights),
245301
as.numeric(gamma),
246302
as.numeric(error.tol),
247303
as.integer(max.iters),
@@ -260,7 +316,7 @@ ccdr_call <- function(data,
260316
names(fit[[k]]$edges) <- names(data)
261317

262318
### Add node names to output
263-
fit[[k]] <- append(fit[[k]], list(names(data)), after = 1) # insert node names into second slot
319+
fit[[k]] <- append(fit[[k]], list(node_names), after = 1) # insert node names into second slot
264320
names(fit[[k]])[2] <- "nodes"
265321
}
266322

@@ -276,7 +332,9 @@ ccdr_gridR <- function(cors,
276332
nj = NULL,
277333
indexj = NULL,
278334
betas,
335+
sigmas,
279336
lambdas,
337+
weights,
280338
gamma,
281339
eps,
282340
maxIters,
@@ -307,7 +365,9 @@ ccdr_gridR <- function(cors,
307365
nj,
308366
indexj,
309367
betas,
368+
sigmas,
310369
lambdas[i],
370+
weights,
311371
gamma = gamma,
312372
eps = eps,
313373
maxIters = maxIters,
@@ -345,39 +405,46 @@ ccdr_singleR <- function(cors,
345405
nj = NULL,
346406
indexj = NULL,
347407
betas,
408+
sigmas,
348409
lambda,
410+
weights,
349411
gamma,
350412
eps,
351413
maxIters,
352414
alpha, # 2-9-15: No longer necessary in ccdr_singleR, but needed since the C++ call asks for it
353415
verbose = FALSE
354416
){
355417

356-
if(is.null(indexj)) indexj <- rep(0L, pp + 1)
418+
### Check dimension parameters
419+
if(!is.integer(pp) || !is.integer(nn)) stop("Both pp and nn must be integers!")
420+
if(pp <= 0 || nn <= 0) stop("Both pp and nn must be positive!")
421+
422+
### These variables, if NULL, need to be initialized before checking anything
423+
if(is.null(indexj)) indexj <- rep(0L, pp + 1) # initialize indexj
424+
if(is.null(nj)) nj <- as.integer(rep(nn, pp)) # initialize nj
425+
357426
### Check indexj
358427
if(!is.vector(indexj)) stop("Index vector for cors is not a vector.")
359428
if(length(indexj) > pp + 1) stop(sprintf("Index vector for cors is too long, expected to be no greater than %d, the number of columns of data.", pp))
360429
if(!is.integer(indexj)) stop("Index vector for cors has non-integer component(s).")
430+
if(any(is.na(indexj) | is.null(indexj))) stop("Index vector cannot have missing or NULL values.")
361431
if(any(indexj < 0 | indexj > pp + 1)) stop(sprintf("Index vector for cors has out-of-range component(s), expected to be between 0 and %d.", pp))
362432

363-
if(is.null(nj)) nj <- as.integer(rep(nn, pp))
364433
### Check nj
365434
if(!is.vector(nj)) stop("Intervention times vector is not a vector.")
366-
if(length(nj) != pp) stop(sprintf("Length of intervention times vector is %d, expected %d% to match the number of columns of data", length(nj), pp))
435+
if(length(nj) != pp) stop(sprintf("Length of intervention times vector is %d, expected to match the number of columns of data = %d", length(nj), pp))
367436
if(!is.integer(nj)) stop("Intervention times vector has non-integer component(s).")
437+
if(any(is.na(nj) | is.null(nj))) stop("Intervention times vector cannot have missing or NULL values.")
368438
if(any(nj < 0 | nj > nn)) stop(sprintf("Intervention times vector has out-of-range component(s), expected to be between 0 and %d.", nn))
369439

370-
### add a weight a_j to penalty on beta_{ij}
371-
### since now with intervention data, beta_{ij} only appears n_j times out of total nn samples
372-
aj <- nj / nn
373-
374440
### Check cors
441+
### This check must come after the checks for indexj, nj since these values are used to check cors
375442
if(!is.numeric(cors)) stop("cors must be a numeric vector!")
376443
if(length(cors) != length(unique(indexj))*pp*(pp+1)/2) stop(paste0("cors has incorrect length: Expected length = ", length(unique(indexj))*pp*(pp+1)/2, " input length = ", length(cors)))
377444

378-
### Check dimension parameters
379-
if(!is.integer(pp) || !is.integer(nn)) stop("Both pp and nn must be integers!")
380-
if(pp <= 0 || nn <= 0) stop("Both pp and nn must be positive!")
445+
### add a weight a_j to penalty on beta_{ij}
446+
### since now with intervention data, beta_{ij} only appears n_j times out of total nn samples
447+
aj <- nj / nn
381448

382449
### Check betas
383450
if(sparsebnUtils::check_if_matrix(betas)){ # if the input is a matrix, convert to SBM object
@@ -387,10 +454,25 @@ ccdr_singleR <- function(cors,
387454
stop("Incompatible data passed for betas parameter: Should be either matrix or list in SparseBlockMatrixR format.")
388455
}
389456

457+
### Check sigmas
458+
if(!is.numeric(sigmas)) stop("sigmas must be numeric!")
459+
if(length(sigmas) != pp) stop(sprintf("sigmas must have length = %d!", pp))
460+
if(any(sigmas < 0)){
461+
# -1 is a sentinel value for updating sigmas via the CD updates
462+
if(any(sigmas != -1.)){
463+
stop("sigmas must be > 0!")
464+
}
465+
}
466+
390467
### Check lambda
391468
if(!is.numeric(lambda)) stop("lambda must be numeric!")
392469
if(lambda < 0) stop("lambda must be >= 0!")
393470

471+
### Check weights
472+
if(length(weights) != pp*pp) stop(sprintf("weights must have length p^2 = %d!", pp*pp))
473+
if(!is.numeric(weights)) stop("weights must be numeric!")
474+
if(weights < -1 || weights > 1) stop("weights out of bounds!")
475+
394476
### Check gamma
395477
if(!is.numeric(gamma)) stop("gamma must be numeric!")
396478
if(gamma < 0 && gamma != -1) stop("gamma must be >= 0 (MCP) or = -1 (Lasso)!")
@@ -412,10 +494,12 @@ ccdr_singleR <- function(cors,
412494
t1.ccdr <- proc.time()[3]
413495
ccdr.out <- singleCCDr(cors,
414496
betas,
497+
sigmas,
415498
nj,
416499
indexj,
417500
aj,
418501
lambda,
502+
weights,
419503
c(gamma, eps, maxIters, alpha),
420504
verbose = verbose)
421505
t2.ccdr <- proc.time()[3]

0 commit comments

Comments
 (0)