Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Track input #62

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Generated by roxygen2: do not edit by hand

export(add_RNA_assay)
export(alive_identification)
export(annotation_consensus)
export(annotation_label_transfer)
Expand Down Expand Up @@ -95,7 +94,6 @@ importFrom(dplyr,select)
importFrom(dplyr,with_groups)
importFrom(edgeR,estimateDisp)
importFrom(future,nbrOfWorkers)
importFrom(future,tweak)
importFrom(glue,glue)
importFrom(grid,grid.grab)
importFrom(gridGraphics,grid.echo)
Expand Down
160 changes: 69 additions & 91 deletions R/execute_pipeline.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
#' @importFrom glue glue
#' @importFrom targets tar_script
#' @import targets
#' @importFrom future tweak
#' @export
run_targets_pipeline <- function(
input_data,
Expand All @@ -34,37 +33,6 @@ run_targets_pipeline <- function(
cell_type_annotation_column = "Cell_type_in_each_tissue"
){

# Fix GCHECKS
read_file <- NULL
reference_file <- NULL
tissue_file <- NULL
filtered_file <- NULL
sample_column_file <- NULL
cell_type_annotation_column_file <- NULL
reference_label_coarse <- NULL
reference_label_fine <- NULL
input_read <- NULL
unique_tissues <- NULL
reference_read <- NULL
empty_droplets_tbl <- NULL
cell_cycle_score_tbl <- NULL
annotation_label_transfer_tbl <- NULL
alive_identification_tbl <- NULL
doublet_identification_tbl <- NULL
non_batch_variation_removal_S <- NULL
preprocessing_output_S <- NULL
create_pseudobulk_sample <- NULL
sampleName <- NULL
cellAnno <- NULL
pseudobulk_merge_all_samples <- NULL
calc_UMAP_dbl_report <- NULL
variable_gene_list <- NULL
tar_render <- NULL
empty_droplets_report <- NULL
doublet_identification_report <- NULL
Technical_variation_report <- NULL
pseudobulk_processing_report <- NULL

sample_column = enquo(sample_column)
# cell_type_annotation_column = enquo(cell_type_annotation_column)

Expand Down Expand Up @@ -111,8 +79,10 @@ run_targets_pipeline <- function(
"scDblFinder",
"ggupset",
"tidySummarizedExperiment",
"broom",
"tarchetypes",
"SeuratObject",
"SingleCellExperiment",
"SingleR",
"celldex",
"tidySingleCellExperiment",
Expand Down Expand Up @@ -149,7 +119,7 @@ run_targets_pipeline <- function(
# )
# )
# plan(slurm)

# small_slurm =
# tar_resources(
# future = tar_resources_future(
Expand Down Expand Up @@ -181,7 +151,7 @@ run_targets_pipeline <- function(
# )

target_list = list(
tar_target(file, "input_file.rds", format = "rds"),
# tar_target(file, "input_file.rds", format = "rds"),
tar_target(read_file, readRDS("input_file.rds")),
#tar_target(reference_file, "input_reference.rds", format = "rds"),
tar_target(reference_file, readRDS("input_reference.rds")),
Expand Down Expand Up @@ -209,13 +179,19 @@ run_targets_pipeline <- function(
tar_target(reference_label_coarse, reference_label_coarse_id(tissue), deployment = "main"),
tar_target(reference_label_fine, reference_label_fine_id(tissue), deployment = "main"),
# Reading input files
tar_target(input_read, readRDS(read_file),
tar_target(input_read, read_file,
pattern = map(read_file),
iteration = "list", deployment = "main"),
iteration = "list"),
tar_target(assay, get_assay(readRDS(input_read)),
pattern = map(input_read),
iteration = "list"),
tar_target(meta_data, extract_metadata(readRDS(input_read)),
pattern = map(input_read),
iteration = "list"),
tar_target(unique_tissues,
get_unique_tissues(input_read, sample_column |> quo_name()),
get_unique_tissues(readRDS(input_read), sample_column |> quo_name()),
pattern = map(input_read),
iteration = "list", deployment = "main"),
iteration = "list"),
# tar_target(
# tissue_subsets,
# input_read, split.by = "Tissue"),
Expand All @@ -226,37 +202,38 @@ run_targets_pipeline <- function(

# Identifying empty droplets
tar_target(empty_droplets_tbl,
empty_droplet_id(input_read, filter_empty_droplets),
empty_droplet_id(readRDS(input_read), filter_empty_droplets),
pattern = map(input_read),
iteration = "list"),

# Cell cycle scoring
tar_target(cell_cycle_score_tbl, cell_cycle_scoring(input_read,
tar_target(cell_cycle_score_tbl, cell_cycle_scoring(readRDS(input_read),
empty_droplets_tbl),
pattern = map(input_read,
empty_droplets_tbl),
iteration = "list"),

# Annotation label transfer
tar_target(annotation_label_transfer_tbl,
annotation_label_transfer(input_read,
annotation_label_transfer(readRDS(input_read),
empty_droplets_tbl,
reference_read),
pattern = map(input_read,
empty_droplets_tbl),
iteration = "list"),

# Alive identification
tar_target(alive_identification_tbl, alive_identification(input_read,
tar_target(alive_identification_tbl, alive_identification(readRDS(input_read),
empty_droplets_tbl,
annotation_label_transfer_tbl),
annotation_label_transfer_tbl,
tissue_name = unique_tissues),
pattern = map(input_read,
empty_droplets_tbl,
annotation_label_transfer_tbl),
iteration = "list"),

# Doublet identification
tar_target(doublet_identification_tbl, doublet_identification(input_read,
tar_target(doublet_identification_tbl, doublet_identification(readRDS(input_read),
empty_droplets_tbl,
alive_identification_tbl,
annotation_label_transfer_tbl,
Expand All @@ -268,7 +245,7 @@ run_targets_pipeline <- function(
iteration = "list"),

# Non-batch variation removal
tar_target(non_batch_variation_removal_S, non_batch_variation_removal(input_read,
tar_target(non_batch_variation_removal_S, non_batch_variation_removal(readRDS(input_read),
empty_droplets_tbl,
alive_identification_tbl,
cell_cycle_score_tbl),
Expand Down Expand Up @@ -305,52 +282,55 @@ run_targets_pipeline <- function(
x = c(sampleName)),
iteration = "list"),

tar_target(calc_UMAP_dbl_report, calc_UMAP(input_read),
tar_target(calc_UMAP_dbl_report, calc_UMAP(readRDS(input_read)),
pattern = map(input_read),
iteration = "list"),
tar_target(variable_gene_list, find_variable_genes(input_read,
tar_target(variable_gene_list, find_variable_genes(readRDS(input_read),
empty_droplets_tbl),
pattern = map(input_read, empty_droplets_tbl),
iteration = "list"),

tar_render(
name = empty_droplets_report, # The name of the target
path = paste0(system.file(package = "HPCell"), "/rmd/Empty_droplet_report.Rmd"),
params = list(x1 = tar_read(input_read, store = store),
x2 = tar_read(empty_droplets_tbl, store = store),
x3 = tar_read(annotation_label_transfer_tbl, store = store),
x4 = tar_read(unique_tissues, store = store),
x5 = sample_column |> quo_name())
),
tar_render(
name = doublet_identification_report,
path = paste0(system.file(package = "HPCell"), "/rmd/Doublet_identification_report.Rmd"),
params = list(x1 = input_read,
x2 = calc_UMAP_dbl_report,
x3 = doublet_identification_tbl,
x4 = annotation_label_transfer_tbl,
x5 = sample_column |> quo_name(),
x6 = cell_type_annotation_column |> quo_name())
),
tar_render(
name = Technical_variation_report,
path = paste0(system.file(package = "HPCell"), "/rmd/Technical_variation_report.Rmd"),
params = list(x1= input_read,
x2= empty_droplets_tbl,
x3 = variable_gene_list,
x4 = calc_UMAP_dbl_report,
x5 = sample_column |> quo_name())
),
tar_render(
name = pseudobulk_processing_report,
path = paste0(system.file(package = "HPCell"), "/rmd/pseudobulk_analysis_report.Rmd"),
params = list(x1 = pseudobulk_merge_all_samples,
x2 = sample_column |> quo_name(),
x3 = cell_type_annotation_column |> quo_name())
)
))
iteration = "list")
#
# tar_render(
# name = empty_droplets_report, # The name of the target
# path = paste0(system.file(package = "HPCell"), "/rmd/Empty_droplet_report.Rmd"),
# params = list(x1 = meta_data,
# x2 = empty_droplets_tbl,
# x3 = annotation_label_transfer_tbl,
# x4 = unique_tissues,
# x5 = sample_column |> quo_name(),
# x6 = assay,
# x7 = alive_id_tbl)
# )
# tar_render(
# name = doublet_identification_report,
# path = paste0(system.file(package = "HPCell"), "/rmd/Doublet_identification_report.Rmd"),
# params = list(x1 = input_read,
# x2 = calc_UMAP_dbl_report,
# x3 = doublet_identification_tbl,
# x4 = annotation_label_transfer_tbl,
# x5 = sample_column |> quo_name(),
# x6 = cell_type_annotation_column |> quo_name())
# ),
# tar_render(
# name = Technical_variation_report,
# path = paste0(system.file(package = "HPCell"), "/rmd/Technical_variation_report.Rmd"),
# params = list(x1= lapply( input_read, function(file) {
# return(readRDS(file))}),
# x2= empty_droplets_tbl,
# x3 = variable_gene_list,
# x4 = calc_UMAP_dbl_report,
# x5 = sample_column |> quo_name())
# ),
# tar_render(
# name = pseudobulk_processing_report,
# path = paste0(system.file(package = "HPCell"), "/rmd/pseudobulk_analysis_report.Rmd"),
# params = list(x1 = pseudobulk_merge_all_samples,
# x2 = sample_column |> quo_name(),
# x3 = cell_type_annotation_column |> quo_name())
# )
))
}, script = glue("{store}.R"), ask = FALSE)

#Running targets
# input_files<- c("CB150T04X__batch14.rds","CB291T01X__batch8.rds")
# run_targets <- function(input_files){
Expand All @@ -362,7 +342,7 @@ run_targets_pipeline <- function(
# run_targets(input_files)
tar_make(
script = glue("{store}.R"),
store = store,
store = store,
callr_function = NULL
)
# tar_make_future(
Expand All @@ -373,10 +353,8 @@ run_targets_pipeline <- function(
# )

message(glue("HPCell says: you can read your output executing tar_read(preprocessing_output_S, store = \"{store}\") "))
tar_meta_download(store = store)
metadata<- tar_meta(names = everything(), store = store)
return(metadata)
#tar_read(preprocessing_output_S, store = store)

tar_read(preprocessing_output_S, store = store)

}

Expand Down
Loading