Skip to content

Commit 67d7c04

Browse files
author
sprokopec
committed
fixed dinucleotide profiling for short frags
1 parent b00ed98 commit 67d7c04

File tree

3 files changed

+62
-4
lines changed

3 files changed

+62
-4
lines changed

pughlab_fragmentomics_pipeline.pl

Lines changed: 55 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -694,7 +694,7 @@ sub get_profile_breakpoints_command {
694694
return($bkpt_command);
695695
}
696696

697-
# format command to run dinucleotide profiling
697+
# format command to run dinucleotide profiling on 'normal' size fragments
698698
sub get_dinucleotide_profiling_command {
699699
my %args = (
700700
id => undef,
@@ -747,6 +747,59 @@ sub get_dinucleotide_profiling_command {
747747
return($din_command);
748748
}
749749

750+
# format command to run dinucleotide profiling on 'short' fragments
751+
sub get_dinucleotide_profiling_short_command {
752+
my %args = (
753+
id => undef,
754+
bedpe => undef,
755+
outdir => undef,
756+
@_
757+
);
758+
759+
my $output_stem = join('/', $args{outdir}, $args{id});
760+
761+
# format bedpe to bed
762+
my $din_command = "# convert bedpe to bed";
763+
$din_command .= "\necho '>>> Running dinucleotide_format_bedpe.R <<<'";
764+
$din_command .= "\n\n" . join(' ',
765+
"Rscript $fragmentomics_code_dir/dinucleotide/scripts/dinucleotide_format_bedpe.R",
766+
'--id', $args{id},
767+
'--bedpe', $args{bedpe},
768+
'--outdir', $args{outdir}
769+
);
770+
771+
# get fasta sequences
772+
$din_command .= "\n\n# extract fasta sequences";
773+
$din_command .= "\necho '>>> Running bedtools getfasta <<<'";
774+
$din_command .= "\n\n" . join(' ',
775+
'bedtools getfasta',
776+
'-bedOut -fi', $reference,
777+
'-bed', $output_stem . '.bed',
778+
'>', $output_stem . '_fasta.bed'
779+
);
780+
781+
# convert fasta to end motif context frequencies
782+
$din_command .= "\n\n# run dinucleotide context profiling";
783+
$din_command .= "\necho '>>> Running dinucleotide_get_contexts.R <<<'";
784+
$din_command .= "\n\n" . join(' ',
785+
"Rscript $fragmentomics_code_dir/dinucleotide/scripts/dinucleotide_get_contexts_short.R",
786+
"--id", $args{id},
787+
"--fasta", $output_stem . '_fasta.bed',
788+
"--outdir", $args{outdir}
789+
);
790+
791+
# remove intermediate files
792+
$din_command .= "\n\n# check for completeness and remove intermediates";
793+
$din_command .= "\n" . join("\n",
794+
'if [[ -f ' . $output_stem . '_contexts.txt ]]; then',
795+
" echo '>>> Dinucleotide Profiling completed successfully! <<<'",
796+
' rm ' . $output_stem . '*.bed*',
797+
'fi'
798+
);
799+
800+
return($din_command);
801+
}
802+
750803
### MAIN ###########################################################################################
751804
sub main {
752805
my %args = (
@@ -1628,7 +1681,7 @@ sub main {
16281681
outdir => $di_directory
16291682
);
16301683

1631-
my $dinucleotide_cmd_part2 = get_dinucleotide_profiling_command(
1684+
my $dinucleotide_cmd_part2 = get_dinucleotide_profiling_short_command(
16321685
id => $tumour_stem . '_len150',
16331686
bedpe => $short_bedpe,
16341687
outdir => $di_directory

scripts/fragmentomics/dinucleotide/scripts/dinucleotide_format_bedpe.R

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,13 @@ chrs <- paste0("chr", c(1:22))
3333
bedpe <- bedpe[bedpe$V1 %in% chrs, ]
3434
bedpe <- bedpe[bedpe$V1 == bedpe$V4, ]
3535
bedpe$length <- bedpe$V6 - bedpe$V2
36-
bedpe <- bedpe[bedpe$length == 167, ]
36+
if (grepl('167', id)) {
37+
bedpe <- bedpe[bedpe$length == 167, ];
38+
} else if (grepl('150', id)) {
39+
bedpe <- bedpe[bedpe$length == 150, ];
40+
} else {
41+
warning('Could not determine desired fragment size; using all fragments.');
42+
}
3743
bedpe <- bedpe[, c("V1", "V2", "V6")]
3844
bedpe <- bedpe[order(factor(bedpe$V1, levels = chrs),
3945
bedpe$V2), ]

scripts/fragmentomics/dinucleotide/scripts/dinucleotide_get_contexts.R

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ fasta$V4 <- toupper(fasta$V4)
3434
fasta <- as.data.table(fasta)
3535
fasta <- fasta[!(fasta$V4 %like% "N"), ]
3636
fasta$length <- nchar(fasta$V4)
37-
fasta <- fasta[fasta$length == 267, ]
3837

3938
#fasta <- fasta[1:1000, ] ### Use this for quick tests
4039
# get dinucleotides for each row in fasta

0 commit comments

Comments
 (0)