diff --git a/perl/MANIFEST b/perl/MANIFEST
index 29e4466..eba55be 100644
--- a/perl/MANIFEST
+++ b/perl/MANIFEST
@@ -4,6 +4,7 @@ bin/brassI_filter.pl
 bin/brassI_np_in.pl
 bin/brassI_pre_filter.pl
 bin/brassI_prep_bam.pl
+bin/combineResults.pl
 bin/findbp
 bin/findbp-all
 bin/make-repeat-file
@@ -21,6 +22,7 @@ lib/Graph/Reader/Velvet.pm
 lib/Sanger/CGP/Brass/Implement.pm
 lib/Sanger/CGP/BrassFilter/BlatFlag.pm
 lib/Sanger/CGP/BrassFilter/BrassMarkedGroups.pm
+lib/Sanger/CGP/BrassFilter/Cleanup.pm
 lib/Sanger/CGP/BrassFilter/CnFlag.pm
 lib/Sanger/CGP/BrassFilter/OccursFlag.pm
 lib/Sanger/CGP/BrassFilter/TransFlag.pm
diff --git a/perl/MYMETA.json b/perl/MYMETA.json
index edc170a..83b2e51 100644
--- a/perl/MYMETA.json
+++ b/perl/MYMETA.json
@@ -43,5 +43,5 @@
       }
    },
    "release_status" : "stable",
-   "version" : "v2.2.0"
+   "version" : "v3.0.0"
 }
diff --git a/perl/MYMETA.yml b/perl/MYMETA.yml
index 7e754e1..7b60b28 100644
--- a/perl/MYMETA.yml
+++ b/perl/MYMETA.yml
@@ -25,4 +25,4 @@ requires:
   Pod::Coverage: '0.23'
   Template: '2.26'
   Test::Fatal: '0.013'
-version: v2.2.0
+version: v3.0.0
diff --git a/perl/Makefile.PL b/perl/Makefile.PL
index efac3c1..549fd1c 100755
--- a/perl/Makefile.PL
+++ b/perl/Makefile.PL
@@ -45,6 +45,7 @@ WriteMakefile(
                         bin/brassI_pre_filter.pl
                         bin/brassI_prep_bam.pl
                         bin/brass.pl
+                        bin/combineResults.pl
                         bin/findbp
                         bin/findbp-all
                         bin/make-repeat-file
diff --git a/perl/bin/brass.pl b/perl/bin/brass.pl
index c2836b0..a7137bc 100755
--- a/perl/bin/brass.pl
+++ b/perl/bin/brass.pl
@@ -97,14 +97,20 @@
 sub cleanup {
   my $options = shift;
   my $tmpdir = $options->{'tmp'};
-  system("cp $tmpdir/*.brm.bam $options->{outdir}/.");
-  move(File::Spec->catdir($tmpdir, 'logs'), File::Spec->catdir($options->{'outdir'}, 'logs')) || die $!;
   my $basefile = File::Spec->catfile($options->{'outdir'}, $options->{'safe_tumour_name'}.'_vs_'.$options->{'safe_normal_name'});
 
   unlink "$basefile.groups";
+  unlink "$basefile.groups.filtered.bedpe";
   unlink "$basefile.assembled.bedpe";
+  unlink $basefile.'_ann.assembled.vcf';
+  unlink $basefile.'_ann.assembled.bedpe';
   unlink "$basefile.annot.vcf";
   unlink "$basefile.annot.vcf.srt";
+  unlink $basefile.'_ann.groups.filtered.vcf';
+  unlink $basefile.'_ann.groups.filtered.bedpe';
+
+  system("cp $tmpdir/*.brm.bam $options->{outdir}/.");
+  move(File::Spec->catdir($tmpdir, 'logs'), File::Spec->catdir($options->{'outdir'}, 'logs')) || die $!;
 
   remove_tree $tmpdir if(-e $tmpdir);
 	return 0;
diff --git a/perl/bin/brassI_filter.pl b/perl/bin/brassI_filter.pl
index cf1d085..73fc3bb 100755
--- a/perl/bin/brassI_filter.pl
+++ b/perl/bin/brassI_filter.pl
@@ -39,12 +39,14 @@
 
 use strict;
 use warnings FATAL => 'all';
+use File::Copy qw(move);
 
 use Sanger::CGP::BrassFilter::BrassMarkedGroups;
 use Sanger::CGP::BrassFilter::TransFlag;
 use Sanger::CGP::BrassFilter::OccursFlag;
 use Sanger::CGP::BrassFilter::CnFlag;
 use Sanger::CGP::BrassFilter::BlatFlag;
+use Sanger::CGP::BrassFilter::Cleanup;
 
 use File::Which;
 use Getopt::Long;
@@ -163,7 +165,10 @@
 if ($rblat_only) { $do_process = 0; $do_trans = 0; $do_occurrences = 0; $do_copynumber = 0; $do_range_blat = 1; }
 
 # process core information and print to outfile
-if ($do_process) { process($infile, $outfile, $tumour); }
+if ($do_process) {
+  process($infile, $outfile, $tumour);
+  cleanup($outfile);
+}
 
 # check for translocations
 if ($do_trans) { update_translocations($outfile, $bal_trans_field, $inv_field, $bal_distance, $inv_distance); }
@@ -177,7 +182,6 @@
 # check for L v H range blat scores
 if ($do_range_blat)  { update_blat($outfile, $blat_field, $blat_script, $ref, $minIdentity); }
 
-#-----------------------------------------------------------------------------#
 #-----------------------------------------------------------------------------#
 sub process {
     my ($infile, $outfile, $tumour) = @_;
@@ -205,6 +209,19 @@ sub process {
 	print "finished printing to $outfile at $date";
     }
 }
+
+#-----------------------------------------------------------------------------#
+sub cleanup {
+  my ($outfile) = @_;
+  $outfile = "$outfile.bedpe" unless($outfile =~ m/\.bedpe$/);
+  my $new_out = "$outfile.tmp";
+
+  my $cleanup = Sanger::CGP::BrassFilter::Cleanup->new(-infile  => $outfile,
+						                                      -outfile => $new_out);
+	$cleanup->process();
+  move($new_out, $outfile) || die $!;
+}
+
 #------------------------------------------------------------------------------------------------#
 
 sub update_translocations {
diff --git a/perl/bin/combineResults.pl b/perl/bin/combineResults.pl
new file mode 100755
index 0000000..8d1893e
--- /dev/null
+++ b/perl/bin/combineResults.pl
@@ -0,0 +1,210 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings FATAL => 'all';
+use autodie;
+use Const::Fast qw(const);
+
+const my @NEW_BEDPE_HEADER => ('# chr1','start1','end1','chr2','start2','end2','id/name','brass_score','assembly_score','strand1','strand2','sample','readpair names','readpair count','bal_trans','inv','occL','occH','copynumber_flag','range_blat','Brass Notation','non-template','micro-homology','assembled readnames','assembled read count','gene','gene_id','transcript_id','strand','end_phase','region','region_number','total_region_count','first/last','gene','gene_id','transcript_id','strand','phase','region','region_number','total_region_count','first/last','fusion_flag');
+
+if(scalar @ARGV != 3) {
+  warn "USAGE: combineResults.pl X_ann.groups X_ann.assembled X.final\n";
+  warn "\tX.final is a prefix, relevant suffixes will be added for VCF and BEDPE outputs\n";
+  exit 1;
+}
+my ($groups_prefix, $assembled_prefix, $final_prefix) = @ARGV;
+
+
+mergeVcf($groups_prefix, $assembled_prefix, $final_prefix);
+mergeBedpe($groups_prefix, $assembled_prefix, $final_prefix);
+
+sub mergeVcf {
+  my ($groups_prefix, $assembled_prefix, $final_prefix) = @_;
+
+  my $assembled_vcf = "$assembled_prefix.vcf";
+  my $phaseI_vcf = "$groups_prefix.vcf";
+
+  my $final_vcf = "$final_prefix.vcf";
+
+  my %orig_data;
+  open my $ORIG, '<', $assembled_vcf;
+  while(my $line = <$ORIG>) {
+    next if($line =~ m/^#/);
+    chomp $line;
+    my @bits = split /\t/, $line;
+    my $id = $bits[2];
+    $orig_data{$id} = \@bits;
+  }
+  close $ORIG;
+
+  my $new_info = q{##INFO=<ID=BAS,Number=.,Type=Integer,Description="Brass Assembly Score:A maximum score of 100 indicates a perfect pattern of 5 vertices in Velvet's de Bruijn graph">}
+                  .qq{\n}
+                  .q{##INFO=<ID=SVCLASS,Number=.,Type=String,Description="basic SV class, deletion, inversion, tandem-duplication">};
+  my $new_format = q{##FORMAT=<ID=PS,Number=1,Type=Integer,Description="Count of pairs that span this breakpoint">};
+
+  my $info_found = 0;
+  my $format_found = 0;
+
+  open my $FINAL, '>', $final_vcf;
+  open my $FIXED, '<', $phaseI_vcf;
+
+  my @ends;
+
+  while(my $line = <$FIXED>) {
+    if(@ends == 2) {
+      svclass(\@ends);
+      print $FINAL join("\t",@{$ends[0]}), "\n";
+      print $FINAL join("\t",@{$ends[1]}), "\n";
+      @ends = ();
+    }
+    if($line =~ m/^#/) {
+      if($info_found == 0 && $line =~ m/^##INFO/) {
+        print $FINAL $new_info,"\n";
+        $info_found = 1;
+      }
+      if($format_found == 0 && $line =~ m/^##FORMAT/) {
+        print $FINAL $new_format,"\n";
+        $format_found = 1;
+      }
+      print $FINAL $line;
+      next;
+    }
+    chomp $line;
+    my @bits = split /\t/, $line;
+    my $id = $bits[2];
+    if(!exists $orig_data{$id}) {
+      $bits[-3] .= ':PS';
+      $bits[-2] = '0:'.$bits[-2]; # normal is always 0:0
+      $bits[-1] = '0:'.$bits[-1];
+
+      push @ends, \@bits;
+      next;
+    }
+    else {
+      my $old_info = $orig_data{$id}->[7];
+      my ($tsrds) = $bits[7] =~ m/(TSRDS=[^;]+)/;
+
+      $bits[7] = $old_info.';'.$tsrds;
+      $bits[7] .= ';BAS='.$orig_data{$id}->[5];
+      $bits[5] = q{.};
+      $bits[-3] .= ':PS';
+      $bits[-2] = '0:'.$bits[-2]; # normal is always 0:0
+      $bits[-1] = $orig_data{$id}->[-1].':'.$bits[-1];
+
+      $bits[7] =~ s/;;/;/g;
+
+      push @ends, \@bits;
+      next;
+    }
+  }
+  if(@ends == 2) {
+    svclass(\@ends);
+    print $FINAL join("\t",@{$ends[0]}), "\n";
+    print $FINAL join("\t",@{$ends[1]}), "\n";
+  }
+
+  close $FIXED;
+  close $FINAL;
+}
+
+sub mergeBedpe {
+  my ($groups_prefix, $assembled_prefix, $final_prefix) = @_;
+  my $assembled_bedpe = "$assembled_prefix.bedpe";
+  my $phaseI_bedpe= "$groups_prefix.bedpe";
+  my $final_bedpe = "$final_prefix.bedpe";
+
+  my %orig_data;
+  open my $ORIG, '<', $assembled_bedpe;
+  while(my $line = <$ORIG>) {
+    next if($line =~ m/^#/);
+    chomp $line;
+    my @bits = split /\t/, $line;
+    my $id = $bits[6];
+    $orig_data{$id} = \@bits;
+  }
+  close $ORIG;
+
+  open my $FINAL, '>', $final_bedpe;
+  open my $FIXED, '<', $phaseI_bedpe;
+
+  print $FINAL join("\t", @NEW_BEDPE_HEADER),"\n";
+
+  while(my $line = <$FIXED>) {
+    next if($line =~ m/^#/);
+    chomp $line;
+    my @bits = split /\t/, $line;
+    my $id = $bits[6];
+    if(index($id, ',') != -1) {
+      my @ids = split /,/, $id;
+      my @multi_record;
+      for(@ids) {
+        push @multi_record, $orig_data{$id} if(exists $orig_data{$id});
+      }
+      if(scalar @multi_record) {
+        warn "MULTI\n";
+        warn "$line\n";
+        warn Dumper(\@multi_record);
+      }
+      die "merged events correlate with assembled results, don't know what to do";
+    }
+    elsif(!exists $orig_data{$id}) {
+      my @new;
+      push @new, @bits[0..7]; # 1-8
+      push @new, q{_}; # 9
+      push @new, @bits[8..9]; # 10-11
+      push @new, $bits[16]; # 12
+      push @new, @bits[18..25]; # 13-20
+      push @new, q{_},q{_},q{_},q{_},q{_}; # 21-25
+      push @new, @bits[26..44]; # 26-44
+
+      print $FINAL join("\t", @new),"\n";
+      next;
+    }
+    else {
+      my @old_brass_II = @{$orig_data{$id}};
+      my @new;
+      push @new, @old_brass_II[0..6]; # 1-7
+      push @new, q{_}; # 8
+      push @new, @old_brass_II[7..10]; # 9-12
+      push @new, @bits[18..25]; # 13-20
+      push @new, @old_brass_II[11..14]; # 21-24
+      push @new, scalar (split /,/, $new[-1]); # 25 # assembled read count
+      push @new, @bits[26..44]; # 26-44
+
+      print $FINAL join("\t", @new),"\n";
+      next;
+    }
+  }
+
+
+  close $FIXED;
+  close $FINAL;
+
+  return;
+}
+
+
+sub svclass {
+  my ($end_a, $end_b) = @{$_[0]};
+  die "Record IDs out of sync at:\n",join("\t",@{$end_a}),"\n",join("\t",@{$end_b}),"\n" if($end_a->[2] !~ m/_[12]$/ || $end_b->[2] !~ m/_[12]$/);
+  my $class;
+  if($end_a->[4] =~ m/^[[:upper:]]\[/) {
+    # ++
+    $class = 'deletion';
+  }
+  elsif($end_a->[4] =~ m/^[[:upper:]]\]/ || $end_a->[4] =~ m/\[[[:upper:]]$/) {
+    # +- / -+
+    $class = 'inversion';
+  }
+  elsif($end_a->[4] =~ m/\][[:upper:]]$/) {
+    # --
+    $class = 'tandem-duplication';
+  }
+  else {
+    die "Unknown rearrangement syntax: $end_a->[4]\n";
+  }
+  $end_a->[7] .= ';SVCLASS='.$class;
+  $end_b->[7] .= ';SVCLASS='.$class;
+  return;
+}
+
diff --git a/perl/docs.tar.gz b/perl/docs.tar.gz
index 0608114..aa3d268 100644
Binary files a/perl/docs.tar.gz and b/perl/docs.tar.gz differ
diff --git a/perl/lib/Bio/Brass.pm b/perl/lib/Bio/Brass.pm
index 7950a4e..aad39b1 100644
--- a/perl/lib/Bio/Brass.pm
+++ b/perl/lib/Bio/Brass.pm
@@ -46,7 +46,7 @@ our @EXPORT = qw(find_breakpoints find_dusty_vertices
 		 is_dusty get_isolated_bp_alignment get_isolated_bp_surrounding_region
 		 $VERSION);
 
-our $VERSION = '2.2.0';
+our $VERSION = '3.0.0';
 
 =head1 NAME
 
diff --git a/perl/lib/Sanger/CGP/Brass/Implement.pm b/perl/lib/Sanger/CGP/Brass/Implement.pm
index 739eb4f..cadd01f 100644
--- a/perl/lib/Sanger/CGP/Brass/Implement.pm
+++ b/perl/lib/Sanger/CGP/Brass/Implement.pm
@@ -248,11 +248,36 @@ sub grass {
   return 1 if PCAP::Threaded::success_exists(File::Spec->catdir($tmp, 'progress'), 0);
 
   my $assembled = File::Spec->catfile($options->{'outdir'}, $options->{'safe_tumour_name'}.'_vs_'.$options->{'safe_normal_name'}.'.assembled.bedpe');
+  my $groups = File::Spec->catfile($options->{'outdir'}, $options->{'safe_tumour_name'}.'_vs_'.$options->{'safe_normal_name'}.'.groups.filtered.bedpe');
   my $merge = sprintf '(cat %s/assemble/bedpe.* | sort -k1,1 -k 2,2n > %s)', $tmp, $assembled;
 
   my $tumour = File::Spec->catfile($tmp, $options->{'safe_tumour_name'}).'.brm.bam';
   my $normal = File::Spec->catfile($tmp, $options->{'safe_normal_name'}).'.brm.bam';
 
+  my $annot_phaseI_prefix = File::Spec->catfile($options->{'outdir'}, $options->{'safe_tumour_name'}.'_vs_'.$options->{'safe_normal_name'}.'_ann.groups.filtered');
+  my $annot_phaseII_prefix = File::Spec->catfile($options->{'outdir'}, $options->{'safe_tumour_name'}.'_vs_'.$options->{'safe_normal_name'}.'_ann.assembled');
+
+  my $final = File::Spec->catfile($options->{'outdir'}, $options->{'safe_tumour_name'}.'_vs_'.$options->{'safe_normal_name'}.'.annot');
+
+  my $combine_cmd = "$^X ";
+  $combine_cmd .= _which('combineResults.pl');
+  $combine_cmd .= ' '.$annot_phaseI_prefix;
+  $combine_cmd .= ' '.$annot_phaseII_prefix;
+  $combine_cmd .= ' '.$final;
+
+  PCAP::Threaded::external_process_handler(File::Spec->catdir($tmp, 'logs'),
+                                            [ $merge,
+                                              _grass($options, $assembled),
+                                              _grass($options, $groups),
+                                              $combine_cmd],
+                                            0);
+
+  PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), 0);
+  return 1;
+}
+
+sub _grass {
+  my ($options, $input) = @_;
   my $grass_cmd = "$^X ";
   $grass_cmd .= _which('grass.pl');
   $grass_cmd .= sprintf $GRASS, $options->{'g_cache'},
@@ -263,19 +288,8 @@ sub grass {
                               $options->{'protocol'},
                               $options->{'tumour_name'},
                               $options->{'normal_name'},
-                              $assembled;
-
-  PCAP::Threaded::external_process_handler(File::Spec->catdir($tmp, 'logs'), [$merge, $grass_cmd], 0);
-
-  my $munged_name =File::Spec->catfile($options->{'outdir'}, $options->{'safe_tumour_name'}.'_vs_'.$options->{'safe_normal_name'}.'_ann.assembled.vcf');
-  my $annotated = File::Spec->catfile($options->{'outdir'}, $options->{'safe_tumour_name'}.'_vs_'.$options->{'safe_normal_name'}.'.annot.vcf');
-  move $munged_name, $annotated || die $!;
-  $munged_name =~ s/vcf$/bedpe/;
-  $annotated =~ s/vcf$/bedpe/;
-  move $munged_name, $annotated || die $!;
-
-  PCAP::Threaded::touch_success(File::Spec->catdir($tmp, 'progress'), 0);
-  return 1;
+                              $input;
+  return $grass_cmd;
 }
 
 sub split_count {
diff --git a/perl/lib/Sanger/CGP/BrassFilter/Cleanup.pm b/perl/lib/Sanger/CGP/BrassFilter/Cleanup.pm
new file mode 100644
index 0000000..239fb1c
--- /dev/null
+++ b/perl/lib/Sanger/CGP/BrassFilter/Cleanup.pm
@@ -0,0 +1,401 @@
+package Sanger::CGP::BrassFilter::Cleanup;
+
+########## LICENCE ##########
+# Copyright (c) 2014,2015 Genome Research Ltd.
+#
+# Author: Cancer Genome Project <cgpit@sanger.ac.uk>
+#
+# This file is part of BRASS.
+#
+# BRASS is free software: you can redistribute it and/or modify it under
+# the terms of the GNU Affero General Public License as published by the Free
+# Software Foundation; either version 3 of the License, or (at your option) any
+# later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+# details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# 1. The usage of a range of years within a copyright statement contained within
+# this distribution should be interpreted as being equivalent to a list of years
+# including the first and last year specified and all consecutive years between
+# them. For example, a copyright statement that reads ‘Copyright (c) 2005, 2007-
+# 2009, 2011-2012’ should be interpreted as being identical to a statement that
+# reads ‘Copyright (c) 2005, 2007, 2008, 2009, 2011, 2012’ and a copyright
+# statement that reads ‘Copyright (c) 2005-2012’ should be interpreted as being
+# identical to a statement that reads ‘Copyright (c) 2005, 2006, 2007, 2008,
+# 2009, 2010, 2011, 2012’."
+########## LICENCE ##########
+
+use strict;
+use autodie qw(:all);
+use warnings FATAL => 'all';
+use List::Util qw(min max sum);
+use Data::Dumper;
+
+use Bio::Brass;
+our $VERSION = Bio::Brass->VERSION;
+
+use Const::Fast qw(const);
+
+const my %DISPATCH => ('chr1' => \&die_if_diff,
+                'start1'  => \&l_min,
+                'end1'    => \&l_max,
+                'start2'  => \&l_min,
+                'chr2' => \&die_if_diff,
+                'end2'    => \&l_max,
+                'id/name'      => \&l_join,
+                'brass_score' => \&l_read_count,
+                'strand1' => \&die_if_diff,
+                'strand2' => \&die_if_diff,
+                'repeats' => \&merge_repeats,
+                'np_sample_count' => \&l_sum,
+                'tumour_count' => \&l_read_count,
+                'normal_count' => \&l_sum,
+                'np_count' => \&l_sum,
+                'bkpt_distance' => \&correct_bp,
+                'sample' => \&uniq_list,
+                'sample_type' => \&uniq_list,
+                'names' => \&uniq_list,
+                'count' => \&sum_counts,
+                'bal_trans' => \&die_if_diff,
+                'inv' => \&l_join,
+                'occL' => \&no_op,
+                'occH' => \&no_op,
+                'copynumber_flag' => \&die_if_diff,
+                'range_blat' => \&l_join,);
+
+sub new {
+  my ($class, %args) = @_;
+  my $self = {};
+  bless $self,$class;
+
+  $self->{'infile'} = $args{-infile} if(defined $args{-infile});
+  $self->{'outfile'} = $args{-outfile} if(defined $args{-outfile});
+
+  return $self;
+}
+
+sub process {
+  my $self = shift;
+  my @headers;
+
+  my $in_file = $self->{'infile'};
+  my $out_file = $self->{'outfile'};
+
+  open my $IN, '<', $in_file;
+  my $last_header;
+  my (%loc_by_name, %name_by_loc);
+  my @header_keys;
+  my %counts;
+  my %end_count;
+  my @pre_clean;
+
+  while(my $line = <$IN>) {
+    if($line =~ m/^#/) {
+      chomp $line;
+      push @headers, $line;
+      next;
+    }
+    unless(defined $last_header) {
+      $last_header = $headers[-1];
+      my @lookups = header_map($last_header);
+      %loc_by_name = %{$lookups[0]};
+      %name_by_loc = %{$lookups[1]};
+      @header_keys = @{$lookups[2]};
+    }
+    chomp $line;
+    $counts{'Groups Total'}++;
+    my %group;
+    my @elements = split /\t/, $line;
+    for my $i(0..((scalar @elements)-1)) {
+      my $name = $name_by_loc{$i};
+      $elements[$i] =~ s/\s+//g if($name eq 'repeats');
+      $group{ $name_by_loc{$i} } = $elements[$i];
+    }
+    $group{'_orig'} = $line;
+    push @pre_clean,  \%group;
+    $end_count{"$group{chr1}:$group{start1}-$group{end1}:$group{strand1}"}++;
+    $end_count{"$group{chr2}:$group{start2}-$group{end2}:$group{strand2}"}++;
+    #push @{$chr_strand_grps{join(':',@elements[0..5],@elements[8..9])}}, $groups[-1];
+  }
+  close $IN;
+
+warn 'loaded: '.$counts{'Groups Total'};
+
+  my $grps_before_clean = scalar @pre_clean;
+  my $end_occ_count = 0;
+  my %chr_strand_grps;
+  while(my $group = shift @pre_clean) {
+    next if($end_count{ "$group->{chr1}:$group->{start1}-$group->{end1}:$group->{strand1}" } > 10);
+    next if($end_count{ "$group->{chr2}:$group->{start2}-$group->{end2}:$group->{strand2}" } > 10);
+    push @{$chr_strand_grps{join(':',$group->{chr1}, $group->{chr2}, $group->{strand1}, $group->{strand2})}}, $group;
+    $end_occ_count++;
+  }
+
+  $counts{'Discard - EndOcc'} = $counts{'Groups Total'} - $end_occ_count;
+
+  #@groups = @{merge_groups(\@groups)};
+  my @groups;
+  for my $key(keys %chr_strand_grps) {
+    warn "Merging groups from set: $key (".(scalar @{$chr_strand_grps{$key}}).")\n";
+    push @groups, @{merge_groups( $chr_strand_grps{$key} )};
+  }
+  $counts{'Discard - merged'} = $end_occ_count - scalar @groups;
+
+  @groups = @{filter_groups(\@groups, \%counts)};
+
+  $counts{'Groups Kept'} = scalar @groups;
+
+  # now fix the occL/H values
+  occX(\@groups);
+
+  my $col_max = (scalar (keys %name_by_loc))-1;
+  open my $OUT, '>', $out_file;
+  print $OUT join("\n", @headers), "\n";
+  for my $group(@groups) {
+    for my $i(0..$col_max) {
+      print $OUT $group->{ $name_by_loc{$i} };
+      print $OUT "\t" if($i != $col_max);
+    }
+    print $OUT "\n";
+  }
+  close $OUT;
+
+  for(sort keys %counts) {
+    warn "$_:\t$counts{$_}\n";
+  }
+}
+
+sub occX {
+  my $groups = shift;
+  for my $this(@{$groups}) {
+    my ($low, $high) = (0,0);
+    for my $that(@{$groups}) {
+      # I count myself too
+      $low++ if(($this->{'start1'}-500) <= $that->{'end1'} && ($this->{'end1'}+500) >= $that->{'start1'});
+      $high++ if(($this->{'start2'}-500) <= $that->{'end2'} && ($this->{'end2'}+500) >= $that->{'start2'});
+    }
+    $this->{'occL'} = $low;
+    $this->{'occH'} = $high;
+  }
+}
+
+sub filter_groups {
+  my ($groups, $counts) = @_;
+  my @new_groups;
+  while(my $group = shift @{$groups}) {
+    if(discard_inv($group) == 1) {
+      $counts->{'Discard - Small Inv'}++;
+      next;
+    }
+    if(discard_smallDel($group) == 1) {
+      $counts->{'Discard - Small del'}++;
+      next;
+    }
+    push @new_groups, $group;
+  }
+  return \@new_groups;
+}
+
+sub merge_groups {
+  my ($groups, $header_keys) = @_;
+  my @new_groups;
+  while (my $this = shift @{$groups}) {
+    # munge the coords for this event if not done in usage
+    unless(exists $this->{'_start1'}) {
+      $this->{'_start1'} = $this->{'start1'}-250;
+      $this->{'_start2'} = $this->{'start2'}-250;
+      $this->{'_end1'} = $this->{'end1'}+250;
+      $this->{'_end2'} = $this->{'end2'}+250;
+    }
+    my $merged = 0;
+    for my $that(@{$groups}) {
+      # will only ever be called once per group as modifying the primary reference
+      unless(exists $that->{'_start1'}) {
+        $that->{'_start1'} = $that->{'start1'}-250;
+        $that->{'_start2'} = $that->{'start2'}-250;
+        $that->{'_end1'} = $that->{'end1'}+250;
+        $that->{'_end2'} = $that->{'end2'}+250;
+      }
+
+      my $a_s1 = $this->{'_start1'};
+      my $a_e1 = $this->{'_end1'};
+      my $a_s2 = $this->{'_start2'};
+      my $a_e2 = $this->{'_end2'};
+
+      my $b_s1 = $that->{'_start1'};
+      my $b_e1 = $that->{'_end1'};
+      my $b_s2 = $that->{'_start2'};
+      my $b_e2 = $that->{'_end2'};
+
+      if(($a_s1 <= $b_e1 && $a_e1 >= $b_s1) && ($a_s2 <= $b_e2 && $a_e2 >= $b_s2)) {
+        merge_event($this, $that, $header_keys);
+        $merged = 1;
+        # clear these as they will now need a different range
+        delete $that->{'_start1'};
+        delete $that->{'_start2'};
+        delete $that->{'_end1'};
+        delete $that->{'_end2'};
+        last; # can only merge once in a loop
+      }
+    }
+
+    push @new_groups, $this unless($merged);
+  }
+  return \@new_groups;
+}
+
+sub discard_smallDel {
+  my $group = shift;
+  # must be same chr, different strand to filter
+  return 0 if($group->{'chr1'} ne $group->{'chr2'});
+  return 0 if($group->{'strand1'} ne '+' || $group->{'strand2'} ne '-');
+  my $dist = $group->{'start2'} - $group->{'end1'};
+  return 1 if($dist < 500);
+  return 0;
+}
+
+sub discard_inv {
+  my $group = shift;
+  # is it an inversion
+  return 0 if(($group->{'chr1'} ne $group->{'chr2'}) || ($group->{'strand1'} ne $group->{'strand2'}));
+
+  my $t_count = $group->{'tumour_count'};
+
+  # some fail fast checks
+  return 1 if($t_count < 4);
+  return 0 if($t_count >= 10);
+
+  my $dist = 1_000_000_000_000; # I just need a big number to start
+
+  for my $g1 ($group->{'start1'}, $group->{'end1'}) {
+    for my $g2 ($group->{'start2'}, $group->{'end2'}) {
+      my $abs_dist = abs $g1 - $g2;
+      $dist = $abs_dist if($abs_dist < $dist);
+    }
+  }
+
+  return 0 if($dist >= 5000 && $t_count >= 4);
+  return 0 if($dist < 5000 && $t_count >= 10);
+  return 1;
+}
+
+sub header_map {
+  my $header_line = shift;
+  $header_line =~ s/^#\s+//;
+  my @elements = split /\t/, $header_line;
+  my (%loc_by_name, %name_by_loc);
+  my @header_keys;
+  my @to_end;
+  my $count = 0;
+  for my $e(@elements) {
+    if($e =~ m/_count$/ || $e eq 'brass_score') {
+      unshift @to_end, $e;
+    } elsif($e eq 'count' || $e eq 'bkpt_distance') {
+      # have to be done last
+      push @to_end, $e;
+    } else {
+      push @header_keys, $e;
+    }
+    $loc_by_name{$e} = $count;
+    $name_by_loc{$count} = $e;
+    $count++;
+  }
+  push @header_keys, @to_end;
+  return (\%loc_by_name, \%name_by_loc, \@header_keys);
+}
+
+sub uniq_list {
+  my ($key, $a, $b) = @_;
+  my %d = map {$_ => 1} (split /,/, $a->{$key}),(split /,/, $b->{$key});
+  return (join ',', keys %d);
+}
+
+sub l_min {
+  my ($key, $a, $b) = @_;
+  return min($a->{$key}, $b->{$key});
+}
+
+sub l_max {
+  my ($key, $a, $b) = @_;
+  return max($a->{$key}, $b->{$key});
+}
+
+sub l_sum {
+  my ($key, $a, $b) = @_;
+  return sum($a->{$key}, $b->{$key});
+}
+
+sub l_read_count {
+  my ($key, $a, $b) = @_;
+  my %d = map {$_ => 1} (split /,/, $a->{'names'}),(split /,/, $b->{'names'});
+  my @keys = keys %d;
+  return scalar @keys;
+}
+
+sub l_join {
+  my ($key, $a, $b) = @_;
+  return q{} if($a->{$key} eq q{} && $b->{$key} eq q{});
+  return $a->{$key} if($a->{$key} ne q{} && $b->{$key} eq q{});
+  return $b->{$key} if($a->{$key} eq q{} && $b->{$key} ne q{});
+  return $a->{$key}.','.$b->{$key};
+}
+
+sub sum_counts {
+  my ($key, $a, $b) = @_;
+  # ignore $key
+  my $sum = 0;
+  for(keys %{$b}) {
+    next if($_ !~ m/_count$/);
+    $sum += $b->{$_};
+  }
+  return $sum;
+}
+
+sub correct_bp {
+  my ($key, $a, $b) = @_;
+  if($a->{$key} == -1 || $b->{$key} == -1) {
+    return -1 if($a->{$key} == $b->{$key});
+    die "\n'$key' value combination is impossible for data:\n$a->{_orig}\n$b->{_orig}\n";
+  }
+  return $b->{'start2'} - $b->{'start1'};
+}
+
+sub die_if_diff {
+  my ($key, $a, $b) = @_;
+  if($a->{$key} ne $b->{$key}) {
+    warn "$key, $a->{$key}, $b->{$key}\n";
+    warn Dumper($a, $b);
+    die "No case provided to handle this difference.\n";
+  }
+}
+
+sub no_op {
+  my ($key, $a, $b) = @_;
+  return $b->{$key};
+}
+
+sub merge_repeats {
+  my ($key, $a, $b) = @_;
+  # don't really need the key but use it anyway
+  return q{} if($a->{$key} eq q{} && $b->{$key} eq q{});
+  return $a->{$key} if($a->{$key} ne q{} && $b->{$key} eq q{});
+  return $b->{$key} if($a->{$key} eq q{} && $b->{$key} ne q{});
+  return $a->{$key}.','.$b->{$key};
+}
+
+sub merge_event {
+  my ($this, $that, $header_keys) = @_;
+  for my $key(@{$header_keys}) {
+    $that->{$key} = $DISPATCH{$key}->($key, $this, $that);
+  }
+  return 1;
+}
+
+
+1;
diff --git a/setup.sh b/setup.sh
index 3fca066..361ab77 100755
--- a/setup.sh
+++ b/setup.sh
@@ -133,6 +133,12 @@ if [[ "x$CHK" == "x" ]] ; then
   exit 1;
 fi
 
+CHK=`perl -le 'eval "require $ARGV[0]" and print $ARGV[0]->VERSION' Sanger::CGP::Grass`
+if [[ "x$CHK" == "x" ]] ; then
+  echo "PREREQUISITE: Please install grass before proceeding: https://github.com/cancerit/grass/releases"
+  exit 1;
+fi
+
 #create a location to build dependencies
 SETUP_DIR=$INIT_DIR/install_tmp
 mkdir -p $SETUP_DIR