Skip to content

Commit

Permalink
Merge branch 'release/5.3.0'
Browse files Browse the repository at this point in the history
  • Loading branch information
keiranmraine committed Aug 24, 2020
2 parents cbd089c + 01a61d3 commit 4452c22
Show file tree
Hide file tree
Showing 10 changed files with 66 additions and 22 deletions.
4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# CHANGES

## 5.3.0

* Support for paired FASTQ names ending with `_R1_001` and `_R2_001` in addition to `_1` and `_2` as before.

## 5.2.2

* Delete files that may remain after an aborted run before resume.
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ FROM ubuntu:20.04

LABEL maintainer="cgphelp@sanger.ac.uk"\
uk.ac.sanger.cgp="Cancer, Ageing and Somatic Mutation, Wellcome Sanger Institute" \
version="5.2.2" \
version="5.3.0" \
description="pcap-core"

ENV OPT /opt/wtsi-cgp
Expand Down
2 changes: 1 addition & 1 deletion bin/bwa_aln.pl
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ =head2 INPUT FILE TYPES
=item f[ast]q
A standard uncompressed fastq file. Requires a pair of inputs with standard suffix of '_1' and '_2'
immediately prior to '.f[ast]q'.
or 'R1_001' and 'R2_001' immediately prior to '.f[ast]q'.
=item f[ast]q.gz
Expand Down
4 changes: 2 additions & 2 deletions bin/bwa_mem.pl
Original file line number Diff line number Diff line change
Expand Up @@ -462,8 +462,8 @@ =head2 INPUT FILE TYPES
=item f[ast]q
A standard uncompressed fastq file. Requires a pair of inputs with standard suffix of '_1' and '_2'
immediately prior to '.f[ast]q' or an interleaved f[ast]q file where read 1 and 2 are adjacent
in the file.
or 'R1_001' and 'R2_001' immediately prior to '.f[ast]q' or an interleaved f[ast]q file where read 1
and 2 are adjacent in the file.
=item f[ast]q.gz
Expand Down
2 changes: 1 addition & 1 deletion lib/PCAP.pm
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use FindBin qw($Bin);
use File::Which qw(which);
# don't use autodie, only core perl in here

our $VERSION = '5.2.2';
our $VERSION = '5.3.0';
our @EXPORT = qw($VERSION _which);

const my $LICENSE =>
Expand Down
17 changes: 9 additions & 8 deletions lib/PCAP/Bwa.pm
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,8 @@ sub split_in {
if($input->fastq) {
# paired fq input
if($input->paired_fq) {
my $fq1 = $input->in.'_1.'.$input->fastq;
my $fq2 = $input->in.'_2.'.$input->fastq;
my $fq1 = $input->in.($input->illumina_fq ? '_R1_001.' : '_1.').$input->fastq;
my $fq2 = $input->in.($input->illumina_fq ? '_R2_001.' : '_2.').$input->fastq;
if($input->fastq =~ m/[.]gz$/ || $fragment_size > 5000) {
symlink $fq1, File::Spec->catfile($split_folder, 'pairedfq1.0.'.$input->fastq);
symlink $fq2, File::Spec->catfile($split_folder, 'pairedfq2.0.'.$input->fastq);
Expand Down Expand Up @@ -415,6 +415,7 @@ sub sampe {

my $pathstub = $input_meta->[$index-1]->tstub;
my $fastq = $input_meta->[$index-1]->fastq;
my $illumina_fq = $input_meta->[$index-1]->illumina_fq;


my $command = _which('bwa') || die "Unable to find 'bwa' in path";;
Expand All @@ -425,19 +426,19 @@ sub sampe {
if(defined $fastq) {
$command .= sprintf $ALN_TO_SORTED, $input_meta->[$index-1]->rg_header(q{\t}),
$ref,
$pathstub,
$pathstub,
$input_meta->[$index-1]->in.'_1', $fastq,
$input_meta->[$index-1]->in.'_2', $fastq,
$pathstub.($illumina_fq ? '_R1_001' : '_1'),
$pathstub.($illumina_fq ? '_R2_001' : '_2'),
$input_meta->[$index-1]->in.($illumina_fq ? '_R1_001' : '_1'), $fastq,
$input_meta->[$index-1]->in.($illumina_fq ? '_R2_001' : '_2'), $fastq,
$bamsort,
$pathstub,
$pathstub;
}
else {
$command .= sprintf $ALN_TO_SORTED, $input_meta->[$index-1]->rg_header(q{\t}),
$ref,
$pathstub,
$pathstub,
$pathstub.'_1',
$pathstub.'_2',
$pathstub, 'bam',
$pathstub, 'bam',
$bamsort,
Expand Down
28 changes: 19 additions & 9 deletions lib/PCAP/Bwa/Meta.pm
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ use YAML qw(LoadFile);

use PCAP::Bam;

const my @INIT_KEYS => qw(in temp fastq paired_fq cram bam);
const my @INIT_KEYS => qw(in temp fastq paired_fq illumina_fq cram bam);
const my @REQUIRED_KEYS => qw(in temp);
const my @REQUIRED_RG_ELEMENTS => qw(SM);
const my @ALLOWED_RG_ELEMENTS => qw(ID CN DS DT FO KS LB PG PI PL PM PU SM);
Expand Down Expand Up @@ -98,6 +98,12 @@ sub paired_fq {
return $self->{'paired_fq'}; # this will create a key but not worth adding overhead to prevent
}

sub illumina_fq {
my $self = shift;
croak "'illumina_fq' can only be set via new()" if(scalar @_ > 0);
return $self->{'illumina_fq'};
}

sub bam_or_cram {
my $self = shift;
if(exists $self->{'bam'} && defined $self->{'bam'} && $self->{'bam'} == 1) {
Expand Down Expand Up @@ -271,10 +277,11 @@ sub files_to_meta {
# must be paired fq
next if(exists $seen_paired_stub{$fq_stub});
$seen_paired_stub{$fq_stub} = 1;
die "Unable to find file for read 1, for ${fq_stub}_X.${fq_ext}\n" unless(-e "${fq_stub}_1.$fq_ext");
die "Unable to find file for read 2, for ${fq_stub}_X.${fq_ext}\n" unless(-e "${fq_stub}_2.$fq_ext");
die "File for read 1 is empty: ${fq_stub}_X.${fq_ext}\n" unless(-s "${fq_stub}_1.$fq_ext");
die "File for read 2 is empty: ${fq_stub}_X.${fq_ext}\n" unless(-s "${fq_stub}_2.$fq_ext");
die "Unable to find file for read 1, for ${fq_stub}_X.${fq_ext}\n" unless(-e "${fq_stub}_1.$fq_ext" || -e "${fq_stub}_R1_001.$fq_ext");
die "Unable to find file for read 2, for ${fq_stub}_X.${fq_ext}\n" unless(-e "${fq_stub}_2.$fq_ext" || -e "${fq_stub}_R2_001.$fq_ext");
die "File for read 1 is empty: ${fq_stub}_X.${fq_ext}\n" unless(-s "${fq_stub}_1.$fq_ext" || -s "${fq_stub}_R1_001.$fq_ext");
die "File for read 2 is empty: ${fq_stub}_X.${fq_ext}\n" unless(-s "${fq_stub}_2.$fq_ext" || -s "${fq_stub}_R2_001.$fq_ext");
$meta->{'illumina_fq'} = ( $end=~m/R[12]_001/ ? 1 : 0 );
$meta->{'paired_fq'} = 1;
$are_paired_fq = 1;
}
Expand Down Expand Up @@ -343,7 +350,7 @@ sub is_fastq_ext {
sub parse_fastq_filename {
my $fastq = shift; # shouldn't have extension by now
my $end;
if($fastq =~ s/_([12])$//) {
if($fastq =~ s/_([12]|R[12]_001)$//) {
$end = $1;
}
return ($fastq, $end);
Expand Down Expand Up @@ -419,9 +426,9 @@ Added for future compatibility with files that can be used with 'BWA mem'.
When input is some form of fastq this returns the extension, otherwise undef.
=item paired_fq
=item illumina_fq
When input is a paired fastq file this returns true.
If paired FASTQ names end with _R[12]_001, this returns true. Returns false if they end with _[12].
=item rg
Expand Down Expand Up @@ -488,6 +495,9 @@ Understands the following as fastq files:
Takes a fastq filename after removal of the extension (see L<is_fastq_ext()/PCAP::Bwa::Meta/is_fastq_ext>
and determines if file is read1, read2 or interleaved fastq.
Expects file to end '_1' or '_2' for paired, assumed interleaved otherwise.
Expects paired FASTQ names to end with one the following. Assumes interleaved FASTQ otherwise.
_1 and _2 - PCAP's original naming requirement
_R1_001 and _R2_001 - Illumina's naming requirement; Require 3 digit suffix to be 001 to avoid split FASTQs
=back
4 changes: 4 additions & 0 deletions t/data/4_R1_001.fq
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
@HS1_10761:8:1312:1589:61870#79/1
CTAACCCTAACCCTAACCCTAACCCTAAACCCTAAACCCTAAACCCTAAACCCTAACCCTAACCCTAACCCTAAC
+
ECEFEGFFDBHHEFCHFGGGGHHHFFGGGGGGDHIDGFFFEGEFH;FGECGGHGDHHAEEEBH:FADEIEEFAED
4 changes: 4 additions & 0 deletions t/data/4_R2_001.fq
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
@HS1_10761:8:1312:1589:61870#79/2
GGGTTAGGGTTAGGGTTAAGGGTTAGGGGTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTAGGGTTTAG
+
>:B>EFFFC7FCHGF5HDHGFH5G4HFC4GFGGF(FFDHB>BFHCF3EEGE?3EGHEE333@GH*CBG;=(ABDC
21 changes: 21 additions & 0 deletions t/pcapBwaMeta.t
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,12 @@ subtest 'Non-object funcions' => sub {
($base, $end) = PCAP::Bwa::Meta::parse_fastq_filename('fastq_2');
is($base, 'fastq', "fastq_2 file should have '_2' removed");
is($end, '2', "Read end should be 2");
($base, $end) = PCAP::Bwa::Meta::parse_fastq_filename('fastq_R1_001');
is($base, 'fastq', "fastq_R1_001 has a basename of 'fastq'");
is($end, 'R1_001', "file ending with R1_001 indicates read 1 of pair");
($base, $end) = PCAP::Bwa::Meta::parse_fastq_filename('fastq_R2_001');
is($base, 'fastq', "fastq_R2_001 has a basename of 'fastq'");
is($end, 'R2_001', "file ending with R2_001 indicates read 2 of a pair");
($base, $end) = PCAP::Bwa::Meta::parse_fastq_filename('fastq_3');
isnt($base, 'fastq', "fastq_3 file should be unchanged, assume interleaved");
ok(!defined $end, "_3, so not defined");
Expand Down Expand Up @@ -134,6 +140,9 @@ subtest 'Objects from file list' => sub {
is(PCAP::Bwa::Meta::files_to_meta($tmp, [ File::Spec->catfile($test_data, '1_1.fq')
, File::Spec->catfile($test_data, '1_2.fq')], 'sample')->[0]->{'paired_fq'}
, 1, 'Paired fq identified as paired');
is(PCAP::Bwa::Meta::files_to_meta($tmp, [ File::Spec->catfile($test_data, '4_R1_001.fq')
, File::Spec->catfile($test_data, '4_R2_001.fq')], 'sample')->[0]->{'paired_fq'}
, 1, 'Paired fastqs with Illumina names identified as paired');
like( exception{ PCAP::Bwa::Meta::files_to_meta($tmp
, [ File::Spec->catfile($test_data, '2_1.fq')
, File::Spec->catfile($test_data, '2_2.fq')]
Expand Down Expand Up @@ -173,6 +182,13 @@ subtest 'Objects from file list' => sub {
, 'sample') }
, qq{ERROR: BAM|CRAM, paired FASTQ and interleaved FASTQ file types cannot be mixed, please choose one type\n}
, 'Fail when inputs are mixed file types');
is( exception{ PCAP::Bwa::Meta::files_to_meta($tmp
, [ File::Spec->catfile($test_data, '4_R1_001.fq')
, File::Spec->catfile($test_data, '4_R2_001.fq')
, File::Spec->catfile($test_data, '1.fq')]
, 'sample') }
, qq{ERROR: BAM|CRAM, paired FASTQ and interleaved FASTQ file types cannot be mixed, please choose one type\n}
, 'Fail when inputs mix interleaves with paired per illumina');
like( exception{ PCAP::Bwa::Meta::files_to_meta($tmp, [ File::Spec->catfile($test_data, 'wibble.wobble')]
, 'sample') }
, qr/.+ is not an expected input file type.\n/m
Expand Down Expand Up @@ -248,6 +264,11 @@ subtest 'Accessors' => sub {
, qr/'paired_fq' can only be set via new\(\)/
, 'Fail to directly set paired_fq');

is($meta->illumina_fq, 0, 'Expected non illumina fastq names');
like( exception { $meta->illumina_fq(1) }
, qr/'illumina_fq' can only be set via new\(\)/
, 'Fail to directly set illumina_fq');

my $tstub = File::Spec->catfile($tmp, '2'); # 2 as files_to_meta called twice

is($meta->tstub, $tstub, 'Generate expected tstub');
Expand Down

0 comments on commit 4452c22

Please sign in to comment.