Skip to content

Commit

Permalink
Merge branch 'release/v1.12'
Browse files Browse the repository at this point in the history
  • Loading branch information
keiranmraine committed Feb 4, 2016
2 parents 536af6c + 7dfc4ee commit cfcde01
Show file tree
Hide file tree
Showing 9 changed files with 75 additions and 118 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@
bin/bam_stats
c/*.o
examples/cgp_gnos_pull.ini
/MYMETA.*
15 changes: 7 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,10 @@ Please see the [wiki](https://github.com/ICGC-TCGA-PanCancer/PCAP-core/wiki) for

####Cutting the release
1. Update `lib/PCAP.pm` to the correct version.
2. Update `c/Makefile` to the correct version.
3. Ensure upgrade path for new version number is added to `lib/PCAP.pm`.
4. Update `CHANGES.md` to show major items.
5. Run `./prerelease.sh`
6. Check all tests and coverage reports are acceptable.
7. Commit the updated docs tree and updated module/version.
8. Push commits.
9. Use the GitHub tools to draft a release.
2. Ensure upgrade path for new version number is added to `lib/PCAP.pm`.
3. Update `CHANGES.md` to show major items.
4. Run `./prerelease.sh`
5. Check all tests and coverage reports are acceptable.
6. Commit the updated docs tree and updated module/version.
7. Push commits.
8. Use the GitHub tools to draft a release.
7 changes: 4 additions & 3 deletions c/Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
VERSION=1.10.0
#VERSION=1.10.0
VERSION=$(shell perl -I../lib -MPCAP -e 'print PCAP->VERSION;')

#Compiler
CC = gcc -O3 -DVERSION='"$(VERSION)"' -g
Expand Down Expand Up @@ -99,9 +100,9 @@ valgrind:
.c.o:
$(CC) $(CFLAGS) $(INCLUDES) -c $< -o $@

clean:
clean:
@echo clean
$(RM) ./*.o *~ $(BAM_STATS_TARGET) ./tests/tests_log $(TESTS) ./*.gcda ./*.gcov ./*.gcno *.gcda *.gcov *.gcno ./tests/*.gcda ./tests/*.gcov ./tests/*.gcno
$(RM) ./*.o *~ $(BAM_STATS_TARGET) ./tests/tests_log $(TESTS) ./*.gcda ./*.gcov ./*.gcno *.gcda *.gcov *.gcno ./tests/*.gcda ./tests/*.gcov ./tests/*.gcno
-rm -rf $(HTSTMP)


Expand Down
135 changes: 39 additions & 96 deletions c/bam_access.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,50 +30,26 @@ int get_rg_index_from_rg_store(rg_info_t **grps, char *rg, int grps_size){
return -1;
}

void parse_rg_line(char *tmp_line, const int idx, rg_info_t **groups,
char *id, char *sm, char *pl, char *pu, char *lib){
void parse_rg_line(char *tmp_line, rg_info_t *group) {
//Now tokenise tmp_line on \t and read in
char *tag = strtok(tmp_line,"\t");
assert(strcmp(tag,"@RG")==0);
group->id = strdup("\0");
group->sample = strdup("\0");
group->platform = strdup("\0");
group->platform_unit = strdup("\0");
group->lib = strdup("\0");
tag = strtok(NULL,"\t");
while(tag != NULL){
int chk = sscanf(tag,"ID:%[^\t\n]",id);
if(chk == 1){
groups[idx]->id = (char *) malloc(sizeof(char) * 1000);
strcpy(groups[idx]->id,id);
tag = strtok(NULL,"\t");
continue;
}
chk=0;
chk = sscanf(tag,"SM:%[^\t\n]",sm);
if(chk == 1){
groups[idx]->sample = (char *) malloc(sizeof(char) * 1000);
strcpy(groups[idx]->sample,sm);
tag = strtok(NULL,"\t");
continue;
}
chk = 0;
chk = sscanf(tag,"PL:%[^\t\n]",pl);
if(chk == 1){
groups[idx]->platform = (char *) malloc(sizeof(char) * 1000);
strcpy(groups[idx]->platform,pl);
tag = strtok(NULL,"\t");
continue;
}
chk = 0;
chk = sscanf(tag,"PU:%[^\t\n]",pu);
if(chk == 1){
groups[idx]->platform_unit = (char *) malloc(sizeof(char) * 1000);
strcpy(groups[idx]->platform_unit,pu);
tag = strtok(NULL,"\t");
continue;
}
chk = 0;
chk = sscanf(tag,"LB:%[^\t\n]",lib);
if(chk == 1){
groups[idx]->lib = (char *) malloc(sizeof(char) * 1000);
strcpy(groups[idx]->lib,lib);
tag = strtok(NULL,"\t");
continue;
}
assert(tag[2]==':');
tag[2]=0;
char *val = tag+3;
if (strcmp("ID",tag)==0) group->id = strdup(val);
if (strlen(val)==0) val = ".";
if (strcmp("SM",tag)==0) group->sample = strdup(val);
if (strcmp("PL",tag)==0) group->platform = strdup(val);
if (strcmp("PU",tag)==0) group->platform_unit = strdup(val);
if (strcmp("LB",tag)==0) group->lib = strdup(val);
tag = strtok(NULL,"\t");
}//End of iterating through tags in this RG tmp_line
return;
Expand All @@ -85,8 +61,7 @@ rg_info_t **parse_header(bam_hdr_t *head, int *grps_size, stats_rd_t ****grp_sta
rg_info_t **groups;
int size = 0;
char *head_txt = head->text;
char *head_bac = malloc(sizeof(char) * (strlen(head_txt)+1));
strncpy(head_bac,head_txt,strlen(head_txt)+1);
char *head_bac = strdup(head_txt);
//First pass counts read groups
line = strtok(head_txt,"\n");
while(line != NULL){
Expand All @@ -100,73 +75,41 @@ rg_info_t **parse_header(bam_hdr_t *head, int *grps_size, stats_rd_t ****grp_sta
//We now have the number of read groups, assign the RG id to each.
groups = (rg_info_t**) malloc(sizeof(rg_info_t *) * size);
check_mem(groups);
char ** ptr = malloc(sizeof(char **));
check_mem(ptr);
line = strtok_r(head_bac,"\n",ptr);
char *ptr = NULL;
line = strtok_r(head_bac,"\n",&ptr);
int idx = 0;
while(line != NULL){
//Check for a read group line
if(strncmp(line,"@RG",3)==0){
groups[idx] = (rg_info_t *) malloc(sizeof(rg_info_t));
check_mem(groups[idx]);
char *id = malloc(sizeof(char) * 1000);
check_mem(id);
id[0] = '\0';
char *sm = malloc(sizeof(char) * 1000);
check_mem(sm);
sm[0] = '\0';
char *pl = malloc(sizeof(char) * 1000);
check_mem(pl);
pl[0] = '\0';
char *pu = malloc(sizeof(char) * 1000);
check_mem(pu);
pu[0] = '\0';
char *lib = malloc(sizeof(char) * 1000);
check_mem(lib);
lib[0] = '\0';
char * tmp = malloc(sizeof(char) * (strlen(line)+1));
strcpy(tmp,line);
parse_rg_line(tmp,idx,groups,id,sm,pl,pu,lib);
check((id != NULL),"Error recognising ID from RG line. NULL found.");
check((strcmp(id,"")!=0),"Error recognising ID from RG line. Empty string.");
check((id[0]!='\0'),"Error recognising ID from RG line. Empty string.");
check((sm != NULL),"Error recognising SM from RG line.");
if((sm[0]=='\0')){
groups[idx]->sample = ".";
}
check(pl != NULL,"Error recognising PL from RG line.");
if((pl[0]=='\0')){
groups[idx]->platform = ".";
}
check(lib != NULL,"Error recognising LB from RG line.");
if((lib[0]=='\0')){
groups[idx]->lib = ".";
}
check(pu != NULL,"Error recognising PU from RG line.");
if((pu[0]=='\0')){
groups[idx]->platform_unit = ".";
}
char * tmp = strdup(line);
parse_rg_line(tmp,groups[idx]);
free (tmp);

check((groups[idx]->id != NULL),"Error recognising ID from RG line. NULL found.");
check((groups[idx]->id[0]!='\0'),"Error recognising ID from RG line. Empty string.");
check((groups[idx]->sample != NULL),"Error recognising SM from RG line.");
if(groups[idx]->sample[0] == '\0') groups[idx]->sample = strdup(".");
check(groups[idx]->platform != NULL,"Error recognising PL from RG line.");
if(groups[idx]->platform[0] == '\0') groups[idx]->platform = strdup(".");
check(groups[idx]->lib != NULL,"Error recognising LB from RG line.");
if(groups[idx]->lib[0] == '\0') groups[idx]->lib = strdup(".");
check(groups[idx]->platform_unit != NULL,"Error recognising PU from RG line.");
if(groups[idx]->platform_unit[0] == '\0') groups[idx]->platform_unit = strdup(".");
idx++;
}//End of iteration through header lines.
line = strtok_r(NULL,"\n",ptr);
line = strtok_r(NULL,"\n",&ptr);
}
free(ptr);
if(line) free(line);
}else{ //Deal with a possible lack of @RG lines.
groups = malloc(sizeof(rg_info_t*) * 1);
check_mem(groups);
groups[0]->id = malloc(sizeof(char*) * 100);
groups[0]->sample = malloc(sizeof(char) * 100);
groups[0]->platform = malloc(sizeof(char) * 100);
groups[0]->platform_unit = malloc(sizeof(char) * 100);
groups[0]->lib = malloc(sizeof(char) * 100);

groups[0]->id = ".";
groups[0]->sample = ".";
groups[0]->platform = ".";
groups[0]->platform_unit = ".";
groups[0]->lib = ".";
groups[0]->id = strdup(".");
groups[0]->sample = strdup(".");
groups[0]->platform = strdup(".");
groups[0]->platform_unit = strdup(".");
groups[0]->lib = strdup(".");
size = 1;
}
*grp_stats = (stats_rd_t***) malloc(sizeof(stats_rd_t**) * (size));
Expand Down
18 changes: 9 additions & 9 deletions c/bam_stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@
#include "khash.h"

static char *input_file = NULL;
static char *output_file;
static char *ref_file;
static char *output_file = NULL;
static char *ref_file = NULL;
static int rna = 0;
int grps_size = 0;
stats_rd_t*** grp_stats;
static char *bas_header = "bam_filename\tsample\tplatform\tplatform_unit\tlibrary\treadgroup\tread_length_r1\tread_length_r2\t#_mapped_bases\t#_mapped_bases_r1\t#_mapped_bases_r2\t#_divergent_bases\t#_divergent_bases_r1\t#_divergent_bases_r2\t#_total_reads\t#_total_reads_r1\t#_total_reads_r2\t#_mapped_reads\t#_mapped_reads_r1\t#_mapped_reads_r2\t#_mapped_reads_properly_paired\t#_gc_bases_r1\t#_gc_bases_r2\tmean_insert_size\tinsert_size_sd\tmedian_insert_size\t#_duplicate_reads\n";
static char *rg_line_pattern = "%s\t%s\t%s\t%s\t%s\t%s\t%ld\t%ld\t%lld\t%lld\t%lld\t%lld\t%lld\t%lld\t%lld\t%lld\t%lld\t%lld\t%lld\t%lld\t%lld\t%lld\t%lld\t%.3f\t%.3f\t%.3f\t%lld\n";
static char *rg_line_pattern = "%s\t%s\t%s\t%s\t%s\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%d\t%.3f\t%.3f\t%.3f\t%d\n";


int check_exist(char *fname){
Expand All @@ -52,7 +52,7 @@ int check_exist(char *fname){

void print_version (int exit_code){
printf ("%s\n",VERSION);
return;
exit(exit_code);
}

void print_usage (int exit_code){
Expand All @@ -64,7 +64,7 @@ void print_usage (int exit_code){
printf ("-r --ref-file File path to reference index (.fai) file.\n");
printf (" NB. If cram format is supplied via -b and the reference listed in the cram header can't be found bam_stats may fail to work correctly.\n");
printf ("-a --rna Uses the RNA method of calculating insert size (ignores anything outside ± ('sd'*standard_dev) of the mean in calculating a new mean)\n");

printf ("Other:\n");
printf ("-h --help Display this usage information.\n");
printf ("-v --version Prints the version number.\n\n");
Expand Down Expand Up @@ -104,11 +104,11 @@ void options(int argc, char *argv[]){
case 'r':
ref_file = optarg;
break;

case 'a':
rna = 1;
break;

case 'h':
print_usage(0);
break;
Expand Down Expand Up @@ -197,15 +197,15 @@ int calculate_mean_sd_median_insert_size(khash_t(ins) *inserts,double *mean, dou
tt_sd += val;
});



if(tt_sd){
double variance = fabs((double)((double)pp_sd / (double)tt_sd));
*sd = sqrt(variance);
}else{
*sd = 0;
}

kh_destroy(ins, inserts);

} //End of if we have data to calculate from.
Expand Down
Binary file modified docs.tar.gz
Binary file not shown.
3 changes: 2 additions & 1 deletion lib/PCAP.pm
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use strict;
use Const::Fast qw(const);
use base 'Exporter';

our $VERSION = '1.11.1';
our $VERSION = '1.12.0';
our @EXPORT = qw($VERSION);

const my $LICENSE =>
Expand Down Expand Up @@ -74,6 +74,7 @@ const my %UPGRADE_PATH => ( '0.1.0' => 'biobambam,bwa,samtools',
'1.10.0' => 'biobambam',
'1.11.0' => 'biobambam',
'1.11.1' => 'biobambam',
'1.12.0' => 'biobambam',
);

sub license {
Expand Down
8 changes: 7 additions & 1 deletion lib/PCAP/Bam.pm
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ sub _which {
}

sub sample_name {
my $bam = shift;
my ($bam, $die_no_sample) = @_;
my $sam = sam_ob($bam);
my $header = $sam->header->text;
my $sample;
Expand All @@ -168,6 +168,12 @@ sub sample_name {
die "BAM file appears to contain data for multiple samples, not supported: \n\n$header\n" if(defined $sample && $sample ne $new_sample);
$sample = $new_sample;
}
unless(defined $sample) {
if(defined $die_no_sample && $die_no_sample != 0) {
die "ERROR: Failed to find samplename in RG headers of $bam";
}
warn "WARN: Failed to find samplename in RG headers of $bam\n";
}
return ($sample, $sam); # also return the SAM object
}

Expand Down
6 changes: 6 additions & 0 deletions t/pcapBam.t
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use strict;
use Test::More;
use Test::Fatal;
use Test::Warn;
use File::Spec;
use Try::Tiny qw(try catch finally);
use Const::Fast qw(const);
Expand Down Expand Up @@ -123,6 +124,11 @@ subtest 'header_info checks' => sub {
, qr/ERROR: This BAM has no readgroups:/m
, 'Fail when no read groups found in header');

warning_like {($sample, $bio_db_sam) = PCAP::Bam::sample_name($no_rg_bam);} qr/WARN: Failed to find samplename in RG headers of /, "Warn when no sample found";

like(exception{ PCAP::Bam::sample_name($no_rg_bam, 1) }
, qr/ERROR: Failed to find samplename in RG headers of /m
, 'Die when no sample found (and die flag set)');
};

done_testing();

0 comments on commit cfcde01

Please sign in to comment.