35
35
# ' This implementation includes two options for the penalty: (1) MCP, and (2) L1 (or Lasso). This option
36
36
# ' is controlled by the \code{gamma} argument.
37
37
# '
38
- # ' @param data Data as \code{\link[sparsebnUtils]{sparsebnData}}. Must be numeric and contain no missing values.
39
- # ' @param betas Initial guess for the algorithm. Represents the weighted adjacency matrix
40
- # ' of a DAG where the algorithm will begin searching for an optimal structure.
41
- # ' @param lambdas (optional) Numeric vector containing a grid of lambda values (i.e. regularization
38
+ # ' @param data Data as \code{\link[sparsebnUtils]{sparsebnData}} object. Must be numeric and contain no missing values.
39
+ # ' @param lambdas Numeric vector containing a grid of lambda values (i.e. regularization
42
40
# ' parameters) to use in the solution path. If missing, a default grid of values will be
43
41
# ' used based on a decreasing log-scale (see also \link{generate.lambdas}).
44
42
# ' @param lambdas.length Integer number of values to include in the solution path. If \code{lambdas}
45
43
# ' has also been specified, this value will be ignored. Note also that the final
46
44
# ' solution path may contain fewer estimates (see
47
45
# ' \code{alpha}).
46
+ # ' @param whitelist A two-column matrix of edges that are guaranteed to be in each
47
+ # ' estimate (a "white list"). Each row in this matrix corresponds
48
+ # ' to an edge that is to be whitelisted. These edges can be
49
+ # ' specified by node name (as a \code{character} matrix), or by
50
+ # ' index (as a \code{numeric} matrix).
51
+ # ' @param blacklist A two-column matrix of edges that are guaranteed to be absent
52
+ # ' from each estimate (a "black list"). See argument
53
+ # ' "\code{whitelist}" above for more details.
48
54
# ' @param gamma Value of concavity parameter. If \code{gamma > 0}, then the MCP will be used
49
55
# ' with \code{gamma} as the concavity parameter. If \code{gamma < 0}, then the L1 penalty
50
56
# ' will be used and this value is otherwise ignored.
51
57
# ' @param error.tol Error tolerance for the algorithm, used to test for convergence.
52
58
# ' @param max.iters Maximum number of iterations for each internal sweep.
53
59
# ' @param alpha Threshold parameter used to terminate the algorithm whenever the number of edges in the
54
60
# ' current DAG estimate is \code{> alpha * ncol(data)}.
61
+ # ' @param betas Initial guess for the algorithm. Represents the weighted adjacency matrix
62
+ # ' of a DAG where the algorithm will begin searching for an optimal structure.
63
+ # ' @param sigmas Numeric vector of known values of conditional variances for each node in the network. If this is
64
+ # ' set by the user, these parameters will not be computed and the input will
65
+ # ' be used as the "true" values of the variances in the algorithm. Note that setting
66
+ # ' this to be all ones (i.e. \code{sigmas[j] = 1} for all \code{j}) is
67
+ # ' equivalent to using the least-squares loss.
55
68
# ' @param verbose \code{TRUE / FALSE} whether or not to print out progress and summary reports.
56
69
# '
57
70
# ' @return A \code{\link[sparsebnUtils]{sparsebnPath}} object.
77
90
# '
78
91
# ' @export
79
92
ccdr.run <- function (data ,
80
- betas ,
81
93
lambdas = NULL ,
82
94
lambdas.length = NULL ,
95
+ whitelist = NULL ,
96
+ blacklist = NULL ,
83
97
gamma = 2.0 ,
84
98
error.tol = 1e-4 ,
85
99
max.iters = NULL ,
86
100
alpha = 10 ,
101
+ betas ,
102
+ sigmas = NULL ,
87
103
verbose = FALSE
88
104
){
89
105
# ## Check data format
@@ -94,12 +110,24 @@ ccdr.run <- function(data,
94
110
data_matrix <- data $ data
95
111
ivn_list <- data $ ivn
96
112
113
+ # ## If ivn_list contains character names, convert to indices
114
+ if (" character" %in% sparsebnUtils :: list_classes(ivn_list )){
115
+ ivn_list <- lapply(ivn_list , function (x ){
116
+ idx <- match(x , names(data_matrix ))
117
+ if (length(idx ) == 0 ) NULL # return NULL if no match (=> observational)
118
+ else idx
119
+ })
120
+ }
121
+
97
122
# ## Call the CCDr algorithm
98
123
ccdr_call(data = data_matrix ,
99
124
ivn = ivn_list ,
100
125
betas = betas ,
126
+ sigmas = sigmas ,
101
127
lambdas = lambdas ,
102
128
lambdas.length = lambdas.length ,
129
+ whitelist = whitelist ,
130
+ blacklist = blacklist ,
103
131
gamma = gamma ,
104
132
error.tol = error.tol ,
105
133
rlam = NULL ,
@@ -120,15 +148,19 @@ MAX_CCS_ARRAY_SIZE <- function() 10000
120
148
ccdr_call <- function (data ,
121
149
ivn = NULL ,
122
150
betas ,
151
+ sigmas ,
123
152
lambdas ,
124
153
lambdas.length ,
154
+ whitelist = NULL ,
155
+ blacklist = NULL ,
125
156
gamma ,
126
157
error.tol ,
127
158
rlam ,
128
159
max.iters ,
129
160
alpha ,
130
161
verbose = FALSE
131
162
){
163
+ node_names <- names(data )
132
164
# ### Allow users to input a data.frame, but kindly warn them about doing this
133
165
# if(is.data.frame(data)){
134
166
# warning(sparsebnUtils::alg_input_data_frame())
@@ -171,6 +203,11 @@ ccdr_call <- function(data,
171
203
nj [j ] <- sum(! sapply(lapply(ivn , is.element , j ), any )) # # optimize for sorted column?
172
204
}
173
205
206
+ # ## Set default for sigmas (negative values => ignore initial value and update as usual)
207
+ if (is.null(sigmas )){
208
+ sigmas <- rep(- 1 . , pp )
209
+ }
210
+
174
211
# ## Use default values for lambda if not specified
175
212
if (is.null(lambdas )){
176
213
if (is.null(lambdas.length )){
@@ -227,6 +264,23 @@ ccdr_call <- function(data,
227
264
max.iters <- sparsebnUtils :: default_max_iters(pp )
228
265
}
229
266
267
+ # ## White/black lists
268
+ # Be careful about handling various NULL cases
269
+ if (! is.null(whitelist )) whitelist <- bwlist_check(whitelist , node_names )
270
+ if (! is.null(blacklist )) blacklist <- bwlist_check(blacklist , node_names )
271
+
272
+ if (! is.null(whitelist ) && ! is.null(blacklist )){
273
+ if (length(intersect(whitelist , blacklist )) > 0 ){
274
+ badinput <- vapply(intersect(whitelist , blacklist ), function (x ) sprintf(" \t [%s]\n " , paste(x , collapse = " ," )), FUN.VALUE = " vector" )
275
+ badinput <- paste(badinput , collapse = " " )
276
+ msg <- sprintf(" Duplicate entries found in blacklist and whitelist: \n %s" , badinput )
277
+ stop(msg )
278
+ }
279
+ }
280
+
281
+ weights <- bwlist_to_weights(blacklist , whitelist , nnode = pp )
282
+
283
+ # ## Pre-process correlation data
230
284
t1.cor <- proc.time()[3 ]
231
285
# cors <- cor(data)
232
286
# cors <- cors[upper.tri(cors, diag = TRUE)]
@@ -241,7 +295,9 @@ ccdr_call <- function(data,
241
295
as.integer(nj ),
242
296
as.integer(indexj ),
243
297
betas ,
298
+ as.numeric(sigmas ),
244
299
as.numeric(lambdas ),
300
+ as.integer(weights ),
245
301
as.numeric(gamma ),
246
302
as.numeric(error.tol ),
247
303
as.integer(max.iters ),
@@ -260,7 +316,7 @@ ccdr_call <- function(data,
260
316
names(fit [[k ]]$ edges ) <- names(data )
261
317
262
318
# ## Add node names to output
263
- fit [[k ]] <- append(fit [[k ]], list (names( data ) ), after = 1 ) # insert node names into second slot
319
+ fit [[k ]] <- append(fit [[k ]], list (node_names ), after = 1 ) # insert node names into second slot
264
320
names(fit [[k ]])[2 ] <- " nodes"
265
321
}
266
322
@@ -276,7 +332,9 @@ ccdr_gridR <- function(cors,
276
332
nj = NULL ,
277
333
indexj = NULL ,
278
334
betas ,
335
+ sigmas ,
279
336
lambdas ,
337
+ weights ,
280
338
gamma ,
281
339
eps ,
282
340
maxIters ,
@@ -307,7 +365,9 @@ ccdr_gridR <- function(cors,
307
365
nj ,
308
366
indexj ,
309
367
betas ,
368
+ sigmas ,
310
369
lambdas [i ],
370
+ weights ,
311
371
gamma = gamma ,
312
372
eps = eps ,
313
373
maxIters = maxIters ,
@@ -345,39 +405,46 @@ ccdr_singleR <- function(cors,
345
405
nj = NULL ,
346
406
indexj = NULL ,
347
407
betas ,
408
+ sigmas ,
348
409
lambda ,
410
+ weights ,
349
411
gamma ,
350
412
eps ,
351
413
maxIters ,
352
414
alpha , # 2-9-15: No longer necessary in ccdr_singleR, but needed since the C++ call asks for it
353
415
verbose = FALSE
354
416
){
355
417
356
- if (is.null(indexj )) indexj <- rep(0L , pp + 1 )
418
+ # ## Check dimension parameters
419
+ if (! is.integer(pp ) || ! is.integer(nn )) stop(" Both pp and nn must be integers!" )
420
+ if (pp < = 0 || nn < = 0 ) stop(" Both pp and nn must be positive!" )
421
+
422
+ # ## These variables, if NULL, need to be initialized before checking anything
423
+ if (is.null(indexj )) indexj <- rep(0L , pp + 1 ) # initialize indexj
424
+ if (is.null(nj )) nj <- as.integer(rep(nn , pp )) # initialize nj
425
+
357
426
# ## Check indexj
358
427
if (! is.vector(indexj )) stop(" Index vector for cors is not a vector." )
359
428
if (length(indexj ) > pp + 1 ) stop(sprintf(" Index vector for cors is too long, expected to be no greater than %d, the number of columns of data." , pp ))
360
429
if (! is.integer(indexj )) stop(" Index vector for cors has non-integer component(s)." )
430
+ if (any(is.na(indexj ) | is.null(indexj ))) stop(" Index vector cannot have missing or NULL values." )
361
431
if (any(indexj < 0 | indexj > pp + 1 )) stop(sprintf(" Index vector for cors has out-of-range component(s), expected to be between 0 and %d." , pp ))
362
432
363
- if (is.null(nj )) nj <- as.integer(rep(nn , pp ))
364
433
# ## Check nj
365
434
if (! is.vector(nj )) stop(" Intervention times vector is not a vector." )
366
- if (length(nj ) != pp ) stop(sprintf(" Length of intervention times vector is %d, expected %d% to match the number of columns of data" , length(nj ), pp ))
435
+ if (length(nj ) != pp ) stop(sprintf(" Length of intervention times vector is %d, expected to match the number of columns of data = %d " , length(nj ), pp ))
367
436
if (! is.integer(nj )) stop(" Intervention times vector has non-integer component(s)." )
437
+ if (any(is.na(nj ) | is.null(nj ))) stop(" Intervention times vector cannot have missing or NULL values." )
368
438
if (any(nj < 0 | nj > nn )) stop(sprintf(" Intervention times vector has out-of-range component(s), expected to be between 0 and %d." , nn ))
369
439
370
- # ## add a weight a_j to penalty on beta_{ij}
371
- # ## since now with intervention data, beta_{ij} only appears n_j times out of total nn samples
372
- aj <- nj / nn
373
-
374
440
# ## Check cors
441
+ # ## This check must come after the checks for indexj, nj since these values are used to check cors
375
442
if (! is.numeric(cors )) stop(" cors must be a numeric vector!" )
376
443
if (length(cors ) != length(unique(indexj ))* pp * (pp + 1 )/ 2 ) stop(paste0(" cors has incorrect length: Expected length = " , length(unique(indexj ))* pp * (pp + 1 )/ 2 , " input length = " , length(cors )))
377
444
378
- # ## Check dimension parameters
379
- if ( ! is.integer( pp ) || ! is.integer( nn )) stop( " Both pp and nn must be integers! " )
380
- if ( pp < = 0 || nn < = 0 ) stop( " Both pp and nn must be positive! " )
445
+ # ## add a weight a_j to penalty on beta_{ij}
446
+ # ## since now with intervention data, beta_{ij} only appears n_j times out of total nn samples
447
+ aj <- nj / nn
381
448
382
449
# ## Check betas
383
450
if (sparsebnUtils :: check_if_matrix(betas )){ # if the input is a matrix, convert to SBM object
@@ -387,10 +454,25 @@ ccdr_singleR <- function(cors,
387
454
stop(" Incompatible data passed for betas parameter: Should be either matrix or list in SparseBlockMatrixR format." )
388
455
}
389
456
457
+ # ## Check sigmas
458
+ if (! is.numeric(sigmas )) stop(" sigmas must be numeric!" )
459
+ if (length(sigmas ) != pp ) stop(sprintf(" sigmas must have length = %d!" , pp ))
460
+ if (any(sigmas < 0 )){
461
+ # -1 is a sentinel value for updating sigmas via the CD updates
462
+ if (any(sigmas != - 1 . )){
463
+ stop(" sigmas must be > 0!" )
464
+ }
465
+ }
466
+
390
467
# ## Check lambda
391
468
if (! is.numeric(lambda )) stop(" lambda must be numeric!" )
392
469
if (lambda < 0 ) stop(" lambda must be >= 0!" )
393
470
471
+ # ## Check weights
472
+ if (length(weights ) != pp * pp ) stop(sprintf(" weights must have length p^2 = %d!" , pp * pp ))
473
+ if (! is.numeric(weights )) stop(" weights must be numeric!" )
474
+ if (weights < - 1 || weights > 1 ) stop(" weights out of bounds!" )
475
+
394
476
# ## Check gamma
395
477
if (! is.numeric(gamma )) stop(" gamma must be numeric!" )
396
478
if (gamma < 0 && gamma != - 1 ) stop(" gamma must be >= 0 (MCP) or = -1 (Lasso)!" )
@@ -412,10 +494,12 @@ ccdr_singleR <- function(cors,
412
494
t1.ccdr <- proc.time()[3 ]
413
495
ccdr.out <- singleCCDr(cors ,
414
496
betas ,
497
+ sigmas ,
415
498
nj ,
416
499
indexj ,
417
500
aj ,
418
501
lambda ,
502
+ weights ,
419
503
c(gamma , eps , maxIters , alpha ),
420
504
verbose = verbose )
421
505
t2.ccdr <- proc.time()[3 ]
0 commit comments