diff --git a/requirements.txt b/requirements.txt index 41f6656..b631737 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ pysam==0.21.0 numpy==1.24.3 pandas==2.0.1 pytest +truvari \ No newline at end of file diff --git a/scripts/Spike/2b_SV.sh b/scripts/Spike/2b_SV.sh index b7527bd..7f88676 100644 --- a/scripts/Spike/2b_SV.sh +++ b/scripts/Spike/2b_SV.sh @@ -8,15 +8,15 @@ READ_LENGTH=$1 VAF=$2 -VCF_A=$3 -VCF_B=$4 -MOD_BAM=$5 -OUTPUT_VCF=$6 -REFERENCE=$7 -OUTPUT_PFX=$8 - -echo "VCF_A: $VCF_A" -echo "VCF_B: $VCF_B" +#$VCF_A=$3 +#VCF_B=$4 +MERGED_VCF=$3 +MOD_BAM=$4 +OUTPUT_VCF=$5 +REFERENCE=$6 +OUTPUT_PFX=$7 + +echo "MERGED_VCF: $MERGED_VCF" echo "MOD_BAM: $MOD_BAM" if [ "$READ_LENGTH" = "short" ]; then @@ -28,11 +28,7 @@ if [ "$READ_LENGTH" = "short" ]; then elif [ "$READ_LENGTH" = "long" ]; then echo "Input is 'long' starting Sniffles2" - sniffles --input $MOD_BAM --genotype-vcf $VCF_A --sample-id mosaicSim --vcf "${OUTPUT_VCF}.vcf1.vcf" - sniffles --input $MOD_BAM --genotype-vcf $VCF_B --sample-id mosaicSim --vcf "${OUTPUT_VCF}.vcf2.vcf" - - # Merge the two VCF files with bcftools - bcftools merge --force-samples -O z --write-index -o $OUTPUT_VCF $VCF_A $VCF_B + sniffles --input $MOD_BAM --genotype-vcf $MERGED_VCF --sample-id mosaicSim --vcf "${OUTPUT_VCF}" else echo "Error: Invalid input" diff --git a/scripts/Spike/2b_re-genotyping_main.sh b/scripts/Spike/2b_re-genotyping_main.sh index e0a2bfa..c8dad1a 100644 --- a/scripts/Spike/2b_re-genotyping_main.sh +++ b/scripts/Spike/2b_re-genotyping_main.sh @@ -20,7 +20,9 @@ MOD_BAM=$5 OUTPUT_DIR=$6 READ_LENGTH=$7 MERGED_VCF="${OUTPUT_DIR}/merged.vcf.gz" -OUTPUT_VCF="${OUTPUT_DIR}/output_genotypes.vcf" +COLLAPSED_MERGED_VCF="${OUTPUT_DIR}/collapsed_merged.vcf.gz" +COLLAPSED_VARIANTS_VCF="${OUTPUT_DIR}/collapsed_variants.vcf.gz" +OUTPUT_VCF="${OUTPUT_DIR}/output_regenotyped.vcf.gz" OUTPUT_VCF_FILTERED="${OUTPUT_DIR}/output_genotypes_filtered.vcf" REFERENCE=${8} @@ -28,13 +30,15 @@ mkdir -p $OUTPUT_DIR # Merge the two VCF files with bcf tools bcftools merge --force-samples -O z --write-index -o $MERGED_VCF $VCF_A $VCF_B +# Collapse variants with truvari +truvari collapse -i $MERGED_VCF -o $COLLAPSED_MERGED_VCF -c $COLLAPSED_VARIANTS_VCF if [ "$VARIANT" = "SNV" ]; then echo "Input is 'SNV'" - ./2b_SNV.sh $READ_LENGTH $VAF $MERGED_VCF $MOD_BAM $OUTPUT_VCF $REFERENCE + ./2b_SNV.sh $READ_LENGTH $VAF $COLLAPSED_MERGED_VCF $MOD_BAM $OUTPUT_VCF $REFERENCE elif [ "$VARIANT" = "SV" ]; then echo "Input is 'SV'" - ./2b_SV2.sh $READ_LENGTH $VAF $VCF_A $VCF_B $OUTPUT_VCF "${OUTPUT_DIR}/output_genotypes" $REFERENCE "Spike_out" + ./2b_SV2.sh $READ_LENGTH $VAF $COLLAPSED_MERGED_VCF $OUTPUT_VCF "${OUTPUT_DIR}/output_genotypes" $REFERENCE "Spike_out" else echo "Error: Invalid input" fi