diff --git a/CHANGES.txt b/CHANGES.txt index f189dfc..6d90571 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -267,3 +267,5 @@ 28082018: bug fixed: get_homologues-pl -X now removes previous DIAMOND results of one vs others when new genomes are added (thanks alexweisberg) 03092018: added section 'Output files explained' to EST manual (thanks Lior Glick) 05092018: updated shebangs of some scripts and added use warnings instead of -w (thanks morgansobol) +10092018: aligned_coords checked before being added to FASTA headers @ get_homologues* (use Warnings) +10092018: pfam_hash keys checked before being used (use Warnings) diff --git a/get_homologues-est.pl b/get_homologues-est.pl index 5542eae..5148d41 100755 --- a/get_homologues-est.pl +++ b/get_homologues-est.pl @@ -876,7 +876,8 @@ $proteome_size = $psize{$taxa[$taxon]}; # update minimal proteome size - if($min_proteome_size == -1 || $proteome_size < $min_proteome_size) + if((defined($min_proteome_size) && $min_proteome_size== -1) || + (defined($min_proteome_size) && defined($proteome_size) && $proteome_size < $min_proteome_size)) { $min_proteome_size = $proteome_size; $smallest_proteome_name = $taxa[$taxon]; @@ -2310,7 +2311,7 @@ # 4.3.1) print reference gene/protein push(@taxon_names,$gindex2[$gene]); # global set in phyTools:constructAllFasta $header = $sequence_data[$gene]; - if(!$saveRAM) + if(!$saveRAM && defined($aligned_coords{$gene}{'first'})) { chomp($header); $header .= " | aligned:$aligned_coords{$gene}{'first'}-$aligned_coords{$gene}{'last'} ". @@ -2812,11 +2813,13 @@ sub split_Pfam_clusters # check number of Pfam domain strings in this cluster my (%Pfam_strings); - $pfam = "$pfam_hash{$cluster}" || ''; + if(defined($pfam_hash{$cluster})){ $pfam = "$pfam_hash{$cluster}" } + else{ $pfam = '' } push(@{$Pfam_strings{$pfam}},$cluster); foreach $orth (@{$ref_hash_orths->{$cluster}}) { - $pfam = "$pfam_hash{$orth}" || ''; + if(defined($pfam_hash{$orth})){ $pfam = "$pfam_hash{$orth}" } + else{ $pfam = '' } push(@{$Pfam_strings{$pfam}},$orth); } #print "<".$#{$ref_hash_orths->{$cluster}}." ".scalar(@{$ref_hash_orths->{$cluster}})."\n"; diff --git a/get_homologues.pl b/get_homologues.pl index 0512a7b..822cd18 100755 --- a/get_homologues.pl +++ b/get_homologues.pl @@ -1120,7 +1120,8 @@ $proteome_size = $psize{$taxa[$taxon]}; # update minimal proteome size - if($min_proteome_size == -1 || $proteome_size < $min_proteome_size) + if((defined($min_proteome_size) && $min_proteome_size== -1) || + (defined($min_proteome_size) && defined($proteome_size) && $proteome_size < $min_proteome_size)) { $min_proteome_size = $proteome_size; $smallest_proteome_name = $taxa[$taxon]; @@ -2645,7 +2646,7 @@ # 4.3.1) print reference gene/protein push(@taxon_names,$gindex2[$gene]); # global set in phyTools:constructAllFasta $header = $sequence_data[$gene]; - if(!$saveRAM) + if(!$saveRAM && defined($aligned_coords{$gene}{'first'})) { chomp($header); $header .= " | aligned:$aligned_coords{$gene}{'first'}-$aligned_coords{$gene}{'last'} ". @@ -3203,11 +3204,13 @@ sub split_Pfam_clusters # check number of Pfam domain strings in this cluster my (%Pfam_strings); - $pfam = "$pfam_hash{$cluster}" || ''; + if(defined($pfam_hash{$cluster})){ $pfam = "$pfam_hash{$cluster}" } + else{ $pfam = '' } push(@{$Pfam_strings{$pfam}},$cluster); foreach $orth (@{$ref_hash_orths->{$cluster}}) { - $pfam = "$pfam_hash{$orth}" || ''; + if(defined($pfam_hash{$orth})){ $pfam = "$pfam_hash{$orth}" } + else{ $pfam = '' } push(@{$Pfam_strings{$pfam}},$orth); } #print "<".$#{$ref_hash_orths->{$cluster}}." ".scalar(@{$ref_hash_orths->{$cluster}})."\n";