Skip to content

Commit fa4fb8c

Browse files
committed
fixing missing dependency error
1 parent 21f9732 commit fa4fb8c

10 files changed

+117
-111
lines changed

DESCRIPTION

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ License: MIT + file LICENSE
1313
Encoding: UTF-8
1414
LazyData: false
1515
Depends: R (>= 3.6), data.table (>= 1.12.4), SummarizedExperiment
16-
Imports: rtracklayer, DelayedArray, HDF5Array, BSgenome, DelayedMatrixStats, parallel, methods, ggplot2, matrixStats, graphics, stats, utils, GenomicRanges, IRanges
16+
Imports: rtracklayer, DelayedArray, HDF5Array, BSgenome, DelayedMatrixStats, parallel, methods, ggplot2, S4Vectors, matrixStats, graphics, stats, utils, GenomicRanges, IRanges
1717
RoxygenNote: 7.1.1
1818
Suggests:
1919
knitr,
@@ -24,6 +24,7 @@ Suggests:
2424
BSgenome.Mmusculus.UCSC.mm9,
2525
MafDb.1Kgenomes.phase3.GRCh38,
2626
MafDb.1Kgenomes.phase3.hs37d5,
27+
BSgenome.Hsapiens.UCSC.hg19,
2728
GenomicScores,
2829
Biostrings,
2930
RColorBrewer,

NAMESPACE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,5 @@ importFrom(stats,prcomp)
5959
importFrom(stats,quantile)
6060
importFrom(stats,sd)
6161
importFrom(utils,browseURL)
62+
importFrom(S4Vectors,metadata)
63+
importFrom("graphics", "barplot", "points")

R/accessory_funcs.R

Lines changed: 46 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ get_source_idx = function(protocol = NULL) {
2929
col_names = c("chr", "start", "M", "U"),
3030
fix_missing = c("cov := M+U", "beta := M/cov"), select= TRUE))
3131
} else {
32-
# Bismark
32+
# Bismark
3333
return(list(col_idx = list(character = 1, numeric = 2, numeric = 4, numeric = 5, numeric = 6),
3434
col_names = c("chr", "start", "beta", "M", "U"),
3535
fix_missing = c("cov := M+U"), select= TRUE))
@@ -157,7 +157,7 @@ read_bdg = function(bdg, col_list = NULL, genome = NULL, verbose = TRUE,
157157
strand_collapse = FALSE, fill_cpgs = TRUE,
158158
contigs = contigs, synced_coordinates = synced_coordinates,
159159
file_uncovered = NULL, zero_based = TRUE) {
160-
160+
161161
chr <- M <- U <- . <- NULL
162162
message(paste0("-Processing: ", basename(bdg)))
163163
if(col_list$select){
@@ -168,14 +168,14 @@ read_bdg = function(bdg, col_list = NULL, genome = NULL, verbose = TRUE,
168168
colClasses = col_list$col_classes,
169169
verbose = FALSE,
170170
showProgress = FALSE))
171-
171+
172172
colnames(bdg_dat)[col_list$col_idx] = names(col_list$col_idx)
173173
bdg_dat[, `:=`(chr, as.character(chr))]
174174
bdg_dat[, `:=`(start, as.integer(start))]
175175
}
176-
177-
178-
176+
177+
178+
179179
if ("beta" %in% colnames(bdg_dat)) {
180180
if (nrow(bdg_dat) < 1000) {
181181
sample_row_idx = 1:nrow(bdg_dat)
@@ -196,19 +196,19 @@ read_bdg = function(bdg, col_list = NULL, genome = NULL, verbose = TRUE,
196196
}
197197
gc(verbose = FALSE)
198198
}
199-
199+
200200
if (!is.null(col_list$fix_missing)) {
201201
for (cmd in col_list$fix_missing) {
202202
bdg_dat[, eval(parse(text = cmd))]
203203
}
204204
}
205-
206-
205+
206+
207207
if (zero_based) {
208208
# Bring bedgraphs to 1-based cordinate
209209
bdg_dat[, `:=`(start, start + 1)]
210210
}
211-
211+
212212
# Check for contig prefixes and add them if necessary
213213
if (nrow(bdg_dat) < 1000) {
214214
sample_row_idx = sample(x = seq_len(nrow(bdg_dat)),
@@ -227,33 +227,33 @@ read_bdg = function(bdg, col_list = NULL, genome = NULL, verbose = TRUE,
227227
stop("Prefix mismatch between provided CpGs and bedgraphs")
228228
}
229229
}
230-
230+
231231
if (!is.null(contigs)) {
232232
bdg_dat = bdg_dat[chr %in% as.character(contigs)]
233233
}
234-
234+
235235
if (synced_coordinates) {
236236
bdg_dat = bdg_dat[strand == "-", `:=`(start, start + 1L)]
237237
}
238-
238+
239239
data.table::setkey(x = bdg_dat, "chr", "start")
240240
dup_rows = nrow(bdg_dat[duplicated(bdg_dat, by = c("chr", "start"))])
241-
if(nrow(dup_rows) > 0){
241+
if(dup_rows > 0){
242242
message(paste0("-- Removed duplicated CpGs: ", format(dup_rows, big.mark = ",")))
243243
bdg_dat = bdg_dat[!duplicated(bdg_dat, by = c("chr", "start"))]
244244
}
245245
data.table::setkey(x = genome, "chr", "start")
246-
246+
247247
missing_cpgs = genome[!bdg_dat[, list(chr, start)], on = c("chr", "start")]
248-
248+
249249
# Write missing CpGs to an op_dir
250250
if (!is.null(file_uncovered) && nrow(missing_cpgs) > 0) {
251251
fwrite(x = missing_cpgs, file = paste0(file_uncovered,
252252
gsub("\\.[[:alnum:]]+(\\.gz)?$",
253253
"", basename(bdg)), "_uncovered.bed"),
254254
sep = "\t", row.names = FALSE)
255255
}
256-
256+
257257
if (verbose) {
258258
if (nrow(missing_cpgs) > 0) {
259259
message(paste0("--CpGs missing: ", format(nrow(missing_cpgs), big.mark = ",")), " (from known reference CpGs)")
@@ -271,7 +271,7 @@ read_bdg = function(bdg, col_list = NULL, genome = NULL, verbose = TRUE,
271271
# crucial to make sure everything is in order
272272
is_identical = all.equal(target = bdg_dat[, .(chr, start)],
273273
current = genome[, .(chr, start)], ignore.row.order = FALSE)
274-
274+
275275
if (is(is_identical, "character")) {
276276
non_ref_cpgs = bdg_dat[!genome[, list(chr, start)], on = c("chr",
277277
"start")]
@@ -289,30 +289,30 @@ read_bdg = function(bdg, col_list = NULL, genome = NULL, verbose = TRUE,
289289
# Re-assign strand info from genome (since some bedgraphs have no
290290
# strand info, yet cover CpGs from both strands. i,e MethylDackel)
291291
bdg_dat[, `:=`(strand, genome$strand)]
292-
292+
293293
if (strand_collapse) {
294294
# If strand information needs to collapsed, bring start position of
295295
# crick strand to previous base (on watson base) and estimate new M, U
296296
# and beta values
297297
if (!all(c("M", "U") %in% names(bdg_dat))) {
298298
stop("strand_collapse works only when M and U are available!")
299299
}
300-
300+
301301
bdg_dat[, `:=`(start, ifelse(strand == "-", yes = start - 1, no = start))]
302302
bdg_dat = bdg_dat[, .(M = sum(M, na.rm = TRUE), U = sum(U, na.rm = TRUE)),
303303
.(chr, start)]
304304
bdg_dat[, `:=`(cov, M + U)]
305305
bdg_dat[, `:=`(beta, M/cov)]
306306
bdg_dat[, `:=`(strand, "+")]
307307
}
308-
308+
309309
# data.table::set(bdg_dat, which(is.nan(bdg_dat[,beta])), 'beta', NA)
310310
# If coverage is 0, convert corresponding beta as well as coverage
311311
# values to NA
312312
data.table::set(bdg_dat, which(bdg_dat[, cov] == 0), c("cov", "beta"),
313313
NA)
314314
bdg_dat = bdg_dat[, .(chr, start, beta, cov, strand)]
315-
315+
316316
bdg_genome_stat = bdg_dat[!is.na(beta), .(mean_meth = mean(beta),
317317
median_meth = median(beta),
318318
mean_cov = mean(cov),
@@ -322,7 +322,7 @@ read_bdg = function(bdg, col_list = NULL, genome = NULL, verbose = TRUE,
322322
mean_cov = mean(cov),
323323
median_cov = median(cov)), .(chr)]
324324
bdg_ncpg_stat = bdg_dat[!is.na(beta), .N, .(chr)]
325-
325+
326326
return(list(bdg = bdg_dat, genome_stat = bdg_genome_stat,
327327
chr_stat = bdg_chr_stat,
328328
ncpg = bdg_ncpg_stat))
@@ -338,13 +338,13 @@ vect_code_batch <- function(files, col_idx, batch_size, col_data = NULL,
338338
. <- NULL
339339
batches <- split(files, ceiling(seq_along(files)/batch_size))
340340
batches_samp_names <- split(rownames(col_data), ceiling(seq_along(rownames(col_data))/batch_size))
341-
341+
342342
beta_mat_final <- data.table::data.table()
343343
cov_mat_final <- data.table::data.table()
344344
genome_stat_final <- data.table::data.table()
345345
chr_stat_final <- data.table::data.table()
346346
ncpg_final <- data.table::data.table()
347-
347+
348348
for (i in seq_along(batches)) {
349349
# browser()
350350
message(paste0("-Batch: ", i, "/", length(batches)))
@@ -364,14 +364,14 @@ vect_code_batch <- function(files, col_idx, batch_size, col_data = NULL,
364364
file_uncovered = file_uncovered, zero_based = zero_based)
365365
}
366366
names(bdgs) <- samp_names
367-
367+
368368
if (i == 1) {
369369
cov_mat_final <- data.frame(lapply(bdgs, function(x) x$bdg[,
370370
.(cov)]), stringsAsFactors = FALSE)
371371
beta_mat_final <- data.frame(lapply(bdgs, function(x) x$bdg[,
372372
.(beta)]), stringsAsFactors = FALSE)
373373
colnames(cov_mat_final) <- colnames(beta_mat_final) <- samp_names
374-
374+
375375
genome_stat_final <- data.table::rbindlist(lapply(bdgs, function(x) x$genome_stat),
376376
use.names = TRUE, fill = TRUE, idcol = "Sample_Name")
377377
chr_stat_final <- data.table::rbindlist(lapply(bdgs, function(x) x$chr_stat),
@@ -386,7 +386,7 @@ vect_code_batch <- function(files, col_idx, batch_size, col_data = NULL,
386386
colnames(cov_mat) <- colnames(beta_mat) <- samp_names
387387
cov_mat_final <- cbind(cov_mat_final, cov_mat)
388388
beta_mat_final <- cbind(beta_mat_final, beta_mat)
389-
389+
390390
genome_stat_final <- rbind(genome_stat_final, data.table::rbindlist(lapply(bdgs,
391391
function(x) x$genome_stat), use.names = TRUE, fill = TRUE,
392392
idcol = "Sample_Name"))
@@ -395,7 +395,7 @@ vect_code_batch <- function(files, col_idx, batch_size, col_data = NULL,
395395
idcol = "Sample_Name"))
396396
ncpg_final <- rbind(ncpg_final, data.table::rbindlist(lapply(bdgs,
397397
function(x) x$ncpg), use.names = TRUE, fill = TRUE, idcol = "Sample_Name"))
398-
398+
399399
rm(cov_mat)
400400
rm(beta_mat)
401401
gc()
@@ -404,7 +404,7 @@ vect_code_batch <- function(files, col_idx, batch_size, col_data = NULL,
404404
gc()
405405
ncpg_final <- data.table::dcast(data = ncpg_final, chr ~ Sample_Name,
406406
value.var = "N")
407-
407+
408408
return(list(beta_matrix = data.table::setDT(beta_mat_final), cov_matrix = data.table::setDT(cov_mat_final),
409409
genome_stat = genome_stat_final, chr_stat = chr_stat_final, ncpg = ncpg_final))
410410
}
@@ -417,14 +417,14 @@ vect_code_batch <- function(files, col_idx, batch_size, col_data = NULL,
417417
non_vect_code <- function(files, col_idx, coldata, verbose = TRUE, genome = NULL,
418418
h5temp = NULL, h5 = FALSE, strand_collapse = FALSE, contigs = contigs,
419419
synced_coordinates, file_uncovered = NULL, zero_based = TRUE) {
420-
420+
421421
Sample_Name <- . <- chr <- NULL
422422
if (strand_collapse) {
423423
dimension <- as.integer(nrow(genome)/2)
424424
} else {
425425
dimension <- as.integer(nrow(genome))
426426
}
427-
427+
428428
if (h5) {
429429
if (is.null(h5temp)) {
430430
h5temp <- tempdir()
@@ -433,11 +433,11 @@ non_vect_code <- function(files, col_idx, coldata, verbose = TRUE, genome = NULL
433433
while (any(c(paste0("M_sink_", sink_counter, ".h5"), paste0("cov_sink_",
434434
sink_counter, ".h5")) %in% dir(h5temp))) {
435435
sink_counter <- sink_counter + 1
436-
436+
437437
}
438438
grid <- DelayedArray::RegularArrayGrid(refdim = c(dimension, length(files)),
439439
spacings = c(dimension, 1L))
440-
440+
441441
M_sink <- HDF5Array::HDF5RealizationSink(dim = c(dimension, length(files)),
442442
dimnames = NULL, type = "double",
443443
filepath = file.path(h5temp, paste0("M_sink_", sink_counter, ".h5")), name = "M", level = 6)
@@ -449,14 +449,14 @@ non_vect_code <- function(files, col_idx, coldata, verbose = TRUE, genome = NULL
449449
beta_mat <- data.table::data.table()
450450
cov_mat <- data.table::data.table()
451451
}
452-
452+
453453
if (h5) {
454454
for (i in seq_along(files)) {
455455
if (i == 1) {
456456
b <- read_bdg(bdg = files[i], col_list = col_idx, genome = genome,
457457
strand_collapse = strand_collapse, contigs = contigs, synced_coordinates = synced_coordinates,
458458
file_uncovered = file_uncovered, zero_based = zero_based)
459-
459+
460460
DelayedArray::write_block(block = as.matrix(b$bdg[, .(beta)]),
461461
viewport = grid[[i]], sink = M_sink)
462462
DelayedArray::write_block(block = as.matrix(b$bdg[, .(cov)]),
@@ -471,7 +471,7 @@ non_vect_code <- function(files, col_idx, coldata, verbose = TRUE, genome = NULL
471471
b <- read_bdg(bdg = files[i], col_list = col_idx, genome = genome,
472472
strand_collapse = strand_collapse, contigs = contigs, synced_coordinates = synced_coordinates,
473473
file_uncovered = file_uncovered, zero_based = zero_based)
474-
474+
475475
DelayedArray::write_block(block = as.matrix(b$bdg[, .(beta)]),
476476
viewport = grid[[i]], sink = M_sink)
477477
DelayedArray::write_block(block = as.matrix(b$bdg[, .(cov)]),
@@ -498,7 +498,7 @@ non_vect_code <- function(files, col_idx, coldata, verbose = TRUE, genome = NULL
498498
strand_collapse = strand_collapse, contigs = contigs,
499499
synced_coordinates = synced_coordinates, file_uncovered = file_uncovered,
500500
zero_based = zero_based)
501-
501+
502502
beta_mat <- b$bdg[, .(chr, start, beta)]
503503
cov_mat <- b$bdg[, .(chr, start, cov)]
504504
genome_stat_final <- b$genome_stat[, `:=`(Sample_Name,
@@ -510,7 +510,7 @@ non_vect_code <- function(files, col_idx, coldata, verbose = TRUE, genome = NULL
510510
strand_collapse = strand_collapse, contigs = contigs,
511511
synced_coordinates = synced_coordinates, file_uncovered = file_uncovered,
512512
zero_based = zero_based)
513-
513+
514514
beta_mat <- cbind(beta_mat, b$bdg[, .(beta)])
515515
cov_mat <- cbind(cov_mat, b$bdg[, .(cov)])
516516
genome_stat_final <- rbind(genome_stat_final, b$genome_stat[,
@@ -523,7 +523,7 @@ non_vect_code <- function(files, col_idx, coldata, verbose = TRUE, genome = NULL
523523
colnames(beta_mat)[ncol(beta_mat)] <- colnames(cov_mat)[ncol(cov_mat)] <- rownames(coldata)[i]
524524
}
525525

526-
526+
527527
ncpg_final <- data.table::dcast(data = ncpg_final, chr ~ Sample_Name,
528528
value.var = "N")
529529
return(list(beta_matrix = beta_mat[, -(seq_len(2))], cov_matrix = cov_mat[, -(seq_len(2))],
@@ -560,7 +560,7 @@ cast_ranges <- function(regions, set.key = TRUE) {
560560
} else {
561561
stop("Invalid input class for regions. Must be a data.table or GRanges object")
562562
}
563-
563+
564564
target_regions
565565
}
566566

@@ -569,7 +569,7 @@ cast_ranges <- function(regions, set.key = TRUE) {
569569
giveme_this <- function(mat, stat = "mean", na_rm = TRUE, ish5 = FALSE) {
570570
stat <- match.arg(arg = stat, choices = c("mean", "median", "min",
571571
"max", "sum"))
572-
572+
573573
if (ish5) {
574574
if (stat == "mean") {
575575
res <- DelayedMatrixStats::colMeans2(mat, na.rm = na_rm)
@@ -595,25 +595,25 @@ giveme_this <- function(mat, stat = "mean", na_rm = TRUE, ish5 = FALSE) {
595595
res <- matrixStats::colSums2(mat, na.rm = na_rm)
596596
}
597597
}
598-
598+
599599
res
600600
}
601601

602602

603603
#--------------------------------------------------------------------------------------------------------------------------
604604
# Tiny script to get axis and limits
605605
get_y_lims <- function(vec) {
606-
606+
607607
y_lims <- range(vec)
608608
y_at <- pretty(y_lims)
609-
609+
610610
if (y_at[1] > min(vec, na.rm = TRUE)) {
611611
y_at[1] <- min(vec, na.rm = TRUE)
612612
}
613613
if (y_at[length(y_at)] < max(vec, na.rm = TRUE)) {
614614
y_at[length(y_at)] <- max(vec, na.rm = TRUE)
615615
}
616616
y_lims <- range(y_at, na.rm = TRUE)
617-
617+
618618
list(y_lims = y_lims, y_at = y_at)
619619
}

R/methrix_object.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ setMethod(f = "show", signature = "methrix", definition = function(object) {
1818
cat(paste0(" n_CpGs: ", format(nrow(object), big.mark = ","), "\n"))
1919
cat(paste0("n_samples: ", ncol(object), "\n"))
2020
cat(paste0(" is_h5: ", is_h5(object), "\n"))
21-
cat(paste0("Reference: ", metadata(object)$genome, "\n"))
21+
cat(paste0("Reference: ", S4Vectors::metadata(object)$genome, "\n"))
2222
})
2323

2424
# Create methrix obj

0 commit comments

Comments
 (0)