-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add ADT, TCR and BCR for Integrated or Grouped analysis
- Loading branch information
Showing
16 changed files
with
1,753 additions
and
141 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
""" | ||
########################################################################## | ||
This rule add adt information to expression gene analysis in grouped single-cell RNA-seq. | ||
########################################################################## | ||
""" | ||
|
||
wildcard_constraints: | ||
grp_add_adt_output = "|".join(GRP_ADD_ADT_OUTPUT) | ||
|
||
""" | ||
This function allows to determine the input .rda ge file and kallisto adt folder. | ||
""" | ||
def grp_add_adt_input(wildcards): | ||
sys.stderr.write(str(wildcards.grp_add_adt_output)+"\n") | ||
ge_rda_file = dic_GRP_ADD_ADT_INFO[wildcards.grp_add_adt_output]['GRP_ADD_ADT_INPUT_RDA'] | ||
kallisto_folder = list(dict.fromkeys(dic_GRP_ADD_ADT_INFO[wildcards.grp_add_adt_output]['GRP_ADD_ADT_INPUT_DIR_ADT'].split(","))) | ||
kallisto_folder.insert(0,ge_rda_file) | ||
return kallisto_folder | ||
|
||
""" | ||
This function allows to determine the singularity binding parameters. | ||
""" | ||
def grp_add_adt_params_sing(wildcards): | ||
rda_folder = os.path.dirname(dic_GRP_ADD_ADT_INFO[wildcards.grp_add_adt_output]['GRP_ADD_ADT_INPUT_RDA']) # output_folder too | ||
concat = " -B " + PIPELINE_FOLDER + ":" + os.path.normpath("/WORKDIR/" + PIPELINE_FOLDER) + " -B " + rda_folder + ":" + os.path.normpath("/WORKDIR/" + rda_folder) | ||
for kallisto_folder in list(dict.fromkeys(dic_GRP_ADD_ADT_INFO[wildcards.grp_add_adt_output]['GRP_ADD_ADT_INPUT_DIR_ADT'].split(","))): | ||
kallisto_folder = os.path.dirname(kallisto_folder) | ||
concat = concat + " -B " + kallisto_folder + ":" + os.path.normpath("/WORKDIR/" + kallisto_folder) | ||
return concat | ||
|
||
""" | ||
This function allows to determine the input alignment folder for params section. | ||
""" | ||
def grp_add_adt_params_input_folder(wildcards): | ||
return ",".join([ os.path.normpath("/WORKDIR/" + kallisto_folder + "/") for kallisto_folder in list(dict.fromkeys(dic_GRP_ADD_ADT_INFO[wildcards.grp_add_adt_output]['GRP_ADD_ADT_INPUT_DIR_ADT'].split(","))) ]) | ||
|
||
""" | ||
This function allows to determine the output folder for params (os.path.dirname() not allowed in params slot). | ||
""" | ||
def grp_add_adt_params_output_folder(wildcards): | ||
return os.path.normpath("/WORKDIR/" + os.path.dirname(wildcards.grp_add_adt_output)) + "/" | ||
|
||
""" | ||
This function allows to determine the sample.name.adt for params. | ||
""" | ||
def grp_add_adt_params_sample_name_adt(wildcards): | ||
return dic_GRP_ADD_ADT_INFO[wildcards.grp_add_adt_output]['GRP_ADD_ADT_SAMPLE_NAME_ADT'] | ||
|
||
|
||
""" | ||
This rule launches the R script to add adt information to expression gene analysis. | ||
""" | ||
rule grp_add_adt_ge: | ||
input: | ||
grp_add_adt_file = grp_add_adt_input | ||
output: | ||
grp_add_adt_rda_file = "{grp_add_adt_output}" + "_ADT.rda" | ||
params: | ||
sing_bind = grp_add_adt_params_sing, | ||
pipeline_folder = os.path.normpath("/WORKDIR/" + PIPELINE_FOLDER), | ||
input_rda = lambda wildcards, input: os.path.normpath("/WORKDIR/" + input[0]), | ||
kallisto_folder = grp_add_adt_params_input_folder, | ||
output_folder = grp_add_adt_params_output_folder, | ||
sample_name_adt = grp_add_adt_params_sample_name_adt | ||
threads: | ||
1 | ||
resources: | ||
mem_mb = lambda wildcards, attempt: min(5120 + attempt * 3072, 20480), | ||
time_min = lambda wildcards, attempt: min(attempt * 120, 200) | ||
shell: | ||
""" | ||
singularity exec --no-home {params.sing_bind} \ | ||
{SINGULARITY_ENV} \ | ||
Rscript {params.pipeline_folder}/scripts/Int_Grp_pipeline_ADT.R \ | ||
--samples.name.adt {params.sample_name_adt} \ | ||
--input.rda.ge {params.input_rda} \ | ||
--output.dir {params.output_folder} \ | ||
--input.dirs.adt {params.kallisto_folder} \ | ||
--author.name {GRP_ADD_ADT_AUTHOR_NAME} \ | ||
--author.mail {GRP_ADD_ADT_AUTHOR_MAIL} \ | ||
--nthreads {threads} \ | ||
--pipeline.path {params.pipeline_folder} \ | ||
--gene.names {GRP_ADD_ADT_GENE_NAMES} \ | ||
--ADT.min.cutoff {GRP_ADD_ADT_MIN_CUTOFF} \ | ||
--ADT.max.cutoff {GRP_ADD_ADT_MAX_CUTOFF} | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
""" | ||
########################################################################## | ||
This rule add bcr information to expression gene analysis in single-cell RNA-seq. | ||
########################################################################## | ||
""" | ||
wildcard_constraints: | ||
grp_add_bcr_output = "|".join(GRP_ADD_BCR_OUTPUT) | ||
|
||
""" | ||
This function allows to determine the input .rda file and csv file from cellranger vdj. | ||
""" | ||
def grp_add_bcr_input(wildcards): | ||
rda_file = dic_GRP_ADD_BCR_INFO[wildcards.grp_add_bcr_output]['GRP_ADD_BCR_INPUT_RDA'] | ||
csv_file = list(dict.fromkeys(dic_GRP_ADD_BCR_INFO[wildcards.grp_add_bcr_output]['GRP_ADD_BCR_INPUT_CSV_BCR'].split(","))) | ||
csv_file.insert(0, rda_file) | ||
return csv_file | ||
|
||
""" | ||
This function allows to determine the singularity binding parameters. | ||
""" | ||
def grp_add_bcr_params_sing(wildcards): | ||
rda_folder = os.path.dirname(dic_GRP_ADD_BCR_INFO[wildcards.grp_add_bcr_output]['GRP_ADD_BCR_INPUT_RDA']) # output_folder too | ||
concat = " -B " + PIPELINE_FOLDER + ":" + os.path.normpath("/WORKDIR/" + PIPELINE_FOLDER) + " -B " + rda_folder + ":" + os.path.normpath("/WORKDIR/" + rda_folder) | ||
for bcrfile in list(dict.fromkeys(dic_GRP_ADD_BCR_INFO[wildcards.grp_add_bcr_output]['GRP_ADD_BCR_INPUT_CSV_BCR'].split(","))): | ||
bcrfile = os.path.dirname(bcrfile) | ||
concat = concat + " -B " + bcrfile + ":" + os.path.normpath("/WORKDIR/" + bcrfile) | ||
return concat | ||
|
||
""" | ||
This function allows to determine the bcr files folders for params. | ||
""" | ||
def grp_add_bcr_params_bcr_files(wildcards): | ||
return ",".join([ os.path.normpath("/WORKDIR/" + bcrfile) for bcrfile in list(dict.fromkeys(dic_GRP_ADD_BCR_INFO[wildcards.grp_add_bcr_output]['GRP_ADD_BCR_INPUT_CSV_BCR'].split(","))) ]) | ||
|
||
""" | ||
This function allows to determine the output folder for params (os.path.dirname() not allowed in params slot). | ||
""" | ||
def grp_add_bcr_params_output_folder(wildcards): | ||
return os.path.normpath("/WORKDIR/" + os.path.dirname(wildcards.grp_add_bcr_output)) + "/" | ||
|
||
""" | ||
This rule launches the R script to add adt information to expression gene analysis. | ||
""" | ||
rule grp_add_bcr_ge: | ||
input: | ||
grp_add_bcr_file = grp_add_bcr_input | ||
output: | ||
grp_add_bcr_rda_file = "{grp_add_bcr_output}" + "_BCR.rda" | ||
params: | ||
sing_bind = grp_add_bcr_params_sing, | ||
pipeline_folder = os.path.normpath("/WORKDIR/" + PIPELINE_FOLDER), | ||
input_rda = lambda wildcards, input: os.path.normpath("/WORKDIR/" + input[0]), | ||
input_csv = grp_add_bcr_params_bcr_files, | ||
output_folder = grp_add_bcr_params_output_folder | ||
threads: | ||
1 | ||
resources: | ||
mem_mb = lambda wildcards, attempt: min(5120 + attempt * 3072, 20480), | ||
time_min = lambda wildcards, attempt: min(attempt * 120, 200) | ||
shell: | ||
""" | ||
singularity exec --no-home {params.sing_bind} \ | ||
{SINGULARITY_ENV_TCR_BCR} \ | ||
Rscript {params.pipeline_folder}/scripts/Int_Grp_pipeline_BCR.R \ | ||
--input.rda {params.input_rda} \ | ||
--output.dir {params.output_folder} \ | ||
--vdj.input.files.bcr {params.input_csv} \ | ||
--author.name {GRP_ADD_BCR_AUTHOR_NAME} \ | ||
--author.mail {GRP_ADD_BCR_AUTHOR_MAIL} \ | ||
--pipeline.path {params.pipeline_folder} | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
""" | ||
########################################################################## | ||
This rule add tcr information to expression gene analysis in single-cell RNA-seq. | ||
########################################################################## | ||
""" | ||
wildcard_constraints: | ||
grp_add_tcr_output = "|".join(GRP_ADD_TCR_OUTPUT) | ||
|
||
""" | ||
This function allows to determine the input .rda file and csv file from cellranger vdj. | ||
""" | ||
def grp_add_tcr_input(wildcards): | ||
rda_file = dic_GRP_ADD_TCR_INFO[wildcards.grp_add_tcr_output]['GRP_ADD_TCR_INPUT_RDA'] | ||
csv_file = list(dict.fromkeys(dic_GRP_ADD_TCR_INFO[wildcards.grp_add_tcr_output]['GRP_ADD_TCR_INPUT_CSV_TCR'].split(","))) | ||
csv_file.insert(0, rda_file) | ||
return csv_file | ||
|
||
""" | ||
This function allows to determine the singularity binding parameters. | ||
""" | ||
def grp_add_tcr_params_sing(wildcards): | ||
rda_folder = os.path.dirname(dic_GRP_ADD_TCR_INFO[wildcards.grp_add_tcr_output]['GRP_ADD_TCR_INPUT_RDA']) # output_folder too | ||
concat = " -B " + PIPELINE_FOLDER + ":" + os.path.normpath("/WORKDIR/" + PIPELINE_FOLDER) + " -B " + rda_folder + ":" + os.path.normpath("/WORKDIR/" + rda_folder) | ||
for tcrfile in list(dict.fromkeys(dic_GRP_ADD_TCR_INFO[wildcards.grp_add_tcr_output]['GRP_ADD_TCR_INPUT_CSV_TCR'].split(","))): | ||
tcrfile = os.path.dirname(tcrfile) | ||
concat = concat + " -B " + tcrfile + ":" + os.path.normpath("/WORKDIR/" + tcrfile) | ||
return concat | ||
|
||
""" | ||
This function allows to determine the tcr files folders for params. | ||
""" | ||
def grp_add_tcr_params_tcr_files(wildcards): | ||
return ",".join([ os.path.normpath("/WORKDIR/" + tcrfile) for tcrfile in list(dict.fromkeys(dic_GRP_ADD_TCR_INFO[wildcards.grp_add_tcr_output]['GRP_ADD_TCR_INPUT_CSV_TCR'].split(","))) ]) | ||
|
||
""" | ||
This function allows to determine the output folder for params (os.path.dirname() not allowed in params slot). | ||
""" | ||
def grp_add_tcr_params_output_folder(wildcards): | ||
return os.path.normpath("/WORKDIR/" + os.path.dirname(wildcards.grp_add_tcr_output)) + "/" | ||
|
||
""" | ||
This rule launches the R script to add adt information to expression gene analysis. | ||
""" | ||
rule grp_add_tcr_ge: | ||
input: | ||
grp_add_tcr_file = grp_add_tcr_input | ||
output: | ||
grp_add_tcr_rda_file = "{grp_add_tcr_output}" + "_TCR.rda" | ||
params: | ||
sing_bind = grp_add_tcr_params_sing, | ||
pipeline_folder = os.path.normpath("/WORKDIR/" + PIPELINE_FOLDER), | ||
input_rda = lambda wildcards, input: os.path.normpath("/WORKDIR/" + input[0]), | ||
input_csv = grp_add_tcr_params_tcr_files, | ||
output_folder = grp_add_tcr_params_output_folder | ||
threads: | ||
1 | ||
resources: | ||
mem_mb = lambda wildcards, attempt: min(5120 + attempt * 3072, 20480), | ||
time_min = lambda wildcards, attempt: min(attempt * 120, 200) | ||
shell: | ||
""" | ||
singularity exec --no-home {params.sing_bind} \ | ||
{SINGULARITY_ENV_TCR_BCR} \ | ||
Rscript {params.pipeline_folder}/scripts/Int_Grp_pipeline_TCR.R \ | ||
--input.rda {params.input_rda} \ | ||
--output.dir {params.output_folder} \ | ||
--vdj.input.files.tcr {params.input_csv} \ | ||
--author.name {GRP_ADD_TCR_AUTHOR_NAME} \ | ||
--author.mail {GRP_ADD_TCR_AUTHOR_MAIL} \ | ||
--pipeline.path {params.pipeline_folder} | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
""" | ||
########################################################################## | ||
This rule add adt information to expression gene analysis in integrated single-cell RNA-seq. | ||
########################################################################## | ||
""" | ||
wildcard_constraints: | ||
int_add_adt_output = "|".join(INT_ADD_ADT_OUTPUT) | ||
|
||
""" | ||
This function allows to determine the input .rda ge file and kallisto adt folder. | ||
""" | ||
def int_add_adt_input(wildcards): | ||
ge_rda_file = dic_INT_ADD_ADT_INFO[wildcards.int_add_adt_output]['INT_ADD_ADT_INPUT_RDA'] | ||
kallisto_folder = list(dict.fromkeys(dic_INT_ADD_ADT_INFO[wildcards.int_add_adt_output]['INT_ADD_ADT_INPUT_DIR_ADT'].split(","))) | ||
kallisto_folder.insert(0,ge_rda_file) | ||
return kallisto_folder | ||
|
||
""" | ||
This function allows to determine the singularity binding parameters. | ||
""" | ||
def int_add_adt_params_sing(wildcards): | ||
rda_folder = os.path.dirname(dic_INT_ADD_ADT_INFO[wildcards.int_add_adt_output]['INT_ADD_ADT_INPUT_RDA']) # output_folder too | ||
concat = " -B " + PIPELINE_FOLDER + ":" + os.path.normpath("/WORKDIR/" + PIPELINE_FOLDER) + " -B " + rda_folder + ":" + os.path.normpath("/WORKDIR/" + rda_folder) | ||
for kallisto_folder in list(dict.fromkeys(dic_INT_ADD_ADT_INFO[wildcards.int_add_adt_output]['INT_ADD_ADT_INPUT_DIR_ADT'].split(","))): | ||
kallisto_folder = os.path.dirname(kallisto_folder) | ||
concat = concat + " -B " + kallisto_folder + ":" + os.path.normpath("/WORKDIR/" + kallisto_folder) | ||
return concat | ||
|
||
""" | ||
This function allows to determine the input alignment folder for params section. | ||
""" | ||
def int_add_adt_params_input_folder(wildcards): | ||
return ",".join([ os.path.normpath("/WORKDIR/" + kallisto_folder + "/") for kallisto_folder in list(dict.fromkeys(dic_INT_ADD_ADT_INFO[wildcards.int_add_adt_output]['INT_ADD_ADT_INPUT_DIR_ADT'].split(","))) ]) | ||
|
||
""" | ||
This function allows to determine the output folder for params (os.path.dirname() not allowed in params slot). | ||
""" | ||
def int_add_adt_params_output_folder(wildcards): | ||
return os.path.normpath("/WORKDIR/" + os.path.dirname(wildcards.int_add_adt_output)) + "/" | ||
|
||
""" | ||
This function allows to determine the sample.name.adt for params. | ||
""" | ||
def int_add_adt_params_sample_name_adt(wildcards): | ||
return dic_INT_ADD_ADT_INFO[wildcards.int_add_adt_output]['INT_ADD_ADT_SAMPLE_NAME_ADT'] | ||
|
||
|
||
""" | ||
This rule launches the R script to add adt information to expression gene analysis. | ||
""" | ||
rule int_add_adt_ge: | ||
input: | ||
int_add_adt_file = int_add_adt_input | ||
output: | ||
int_add_adt_rda_file = "{int_add_adt_output}" + "_ADT.rda" | ||
params: | ||
sing_bind = int_add_adt_params_sing, | ||
pipeline_folder = os.path.normpath("/WORKDIR/" + PIPELINE_FOLDER), | ||
input_rda = lambda wildcards, input: os.path.normpath("/WORKDIR/" + input[0]), | ||
kallisto_folder = int_add_adt_params_input_folder, | ||
output_folder = int_add_adt_params_output_folder, | ||
sample_name_adt = int_add_adt_params_sample_name_adt | ||
threads: | ||
1 | ||
resources: | ||
mem_mb = lambda wildcards, attempt: min(5120 + attempt * 3072, 20480), | ||
time_min = lambda wildcards, attempt: min(attempt * 120, 200) | ||
shell: | ||
""" | ||
singularity exec --no-home {params.sing_bind} \ | ||
{SINGULARITY_ENV} \ | ||
Rscript {params.pipeline_folder}/scripts/Int_Grp_pipeline_ADT.R \ | ||
--samples.name.adt {params.sample_name_adt} \ | ||
--input.rda.ge {params.input_rda} \ | ||
--output.dir {params.output_folder} \ | ||
--input.dirs.adt {params.kallisto_folder} \ | ||
--author.name {INT_ADD_ADT_AUTHOR_NAME} \ | ||
--author.mail {INT_ADD_ADT_AUTHOR_MAIL} \ | ||
--nthreads {threads} \ | ||
--pipeline.path {params.pipeline_folder} \ | ||
--gene.names {INT_ADD_ADT_GENE_NAMES} \ | ||
--ADT.min.cutoff {INT_ADD_ADT_MIN_CUTOFF} \ | ||
--ADT.max.cutoff {INT_ADD_ADT_MAX_CUTOFF} | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
""" | ||
########################################################################## | ||
This rule add bcr information to expression gene analysis in single-cell RNA-seq. | ||
########################################################################## | ||
""" | ||
wildcard_constraints: | ||
int_add_bcr_output = "|".join(INT_ADD_BCR_OUTPUT) | ||
|
||
""" | ||
This function allows to determine the input .rda file and csv file from cellranger vdj. | ||
""" | ||
def int_add_bcr_input(wildcards): | ||
rda_file = dic_INT_ADD_BCR_INFO[wildcards.int_add_bcr_output]['INT_ADD_BCR_INPUT_RDA'] | ||
csv_file = list(dict.fromkeys(dic_INT_ADD_BCR_INFO[wildcards.int_add_bcr_output]['INT_ADD_BCR_INPUT_CSV_BCR'].split(","))) | ||
csv_file.insert(0, rda_file) | ||
return csv_file | ||
|
||
""" | ||
This function allows to determine the singularity binding parameters. | ||
""" | ||
def int_add_bcr_params_sing(wildcards): | ||
rda_folder = os.path.dirname(dic_INT_ADD_BCR_INFO[wildcards.int_add_bcr_output]['INT_ADD_BCR_INPUT_RDA']) # output_folder too | ||
concat = " -B " + PIPELINE_FOLDER + ":" + os.path.normpath("/WORKDIR/" + PIPELINE_FOLDER) + " -B " + rda_folder + ":" + os.path.normpath("/WORKDIR/" + rda_folder) | ||
for bcrfile in list(dict.fromkeys(dic_INT_ADD_BCR_INFO[wildcards.int_add_bcr_output]['INT_ADD_BCR_INPUT_CSV_BCR'].split(","))): | ||
bcrfile = os.path.dirname(bcrfile) | ||
concat = concat + " -B " + bcrfile + ":" + os.path.normpath("/WORKDIR/" + bcrfile) | ||
return concat | ||
|
||
""" | ||
This function allows to determine the bcr files folders for params. | ||
""" | ||
def int_add_bcr_params_bcr_files(wildcards): | ||
return ",".join([ os.path.normpath("/WORKDIR/" + bcrfile) for bcrfile in list(dict.fromkeys(dic_INT_ADD_BCR_INFO[wildcards.int_add_bcr_output]['INT_ADD_BCR_INPUT_CSV_BCR'].split(","))) ]) | ||
|
||
""" | ||
This function allows to determine the output folder for params (os.path.dirname() not allowed in params slot). | ||
""" | ||
def int_add_bcr_params_output_folder(wildcards): | ||
return os.path.normpath("/WORKDIR/" + os.path.dirname(wildcards.int_add_bcr_output)) + "/" | ||
|
||
""" | ||
This rule launches the R script to add adt information to expression gene analysis. | ||
""" | ||
rule int_add_bcr_ge: | ||
input: | ||
int_add_bcr_file = int_add_bcr_input | ||
output: | ||
int_add_bcr_rda_file = "{int_add_bcr_output}" + "_BCR.rda" | ||
params: | ||
sing_bind = int_add_bcr_params_sing, | ||
pipeline_folder = os.path.normpath("/WORKDIR/" + PIPELINE_FOLDER), | ||
input_rda = lambda wildcards, input: os.path.normpath("/WORKDIR/" + input[0]), | ||
input_csv = int_add_bcr_params_bcr_files, | ||
output_folder = int_add_bcr_params_output_folder | ||
threads: | ||
1 | ||
resources: | ||
mem_mb = lambda wildcards, attempt: min(5120 + attempt * 3072, 20480), | ||
time_min = lambda wildcards, attempt: min(attempt * 120, 200) | ||
shell: | ||
""" | ||
singularity exec --no-home {params.sing_bind} \ | ||
{SINGULARITY_ENV_TCR_BCR} \ | ||
Rscript {params.pipeline_folder}/scripts/Int_Grp_pipeline_BCR.R \ | ||
--input.rda {params.input_rda} \ | ||
--output.dir {params.output_folder} \ | ||
--vdj.input.files.bcr {params.input_csv} \ | ||
--author.name {INT_ADD_BCR_AUTHOR_NAME} \ | ||
--author.mail {INT_ADD_BCR_AUTHOR_MAIL} \ | ||
--pipeline.path {params.pipeline_folder} | ||
""" |
Oops, something went wrong.