|
6 | 6 | import htsjdk.variant.variantcontext.VariantContextBuilder;
|
7 | 7 | import htsjdk.variant.variantcontext.writer.VariantContextWriter;
|
8 | 8 | import htsjdk.variant.vcf.VCFHeader;
|
| 9 | +import htsjdk.variant.vcf.VCFHeaderLineCount; |
9 | 10 | import htsjdk.variant.vcf.VCFHeaderLineType;
|
10 | 11 | import htsjdk.variant.vcf.VCFInfoHeaderLine;
|
11 | 12 | import org.broadinstitute.barclay.argparser.Argument;
|
|
40 | 41 | * <li>Reference track overlap</li>
|
41 | 42 | * </ul>
|
42 | 43 | * Records are annotated with their respective strata names in the {@link GATKSVVCFConstants#STRATUM_INFO_KEY} INFO
|
43 |
| - * field. Users must provide a stratification configuration .tsv file (tab-delimited table) with the following column |
| 44 | + * field. SVs that do not match any of the groups will be annotated with the {@link SVStratify#DEFAULT_STRATUM} group. |
| 45 | + * Users must provide a stratification configuration .tsv file (tab-delimited table) with the following column |
44 | 46 | * header on the first line:
|
45 | 47 | * <ol>
|
46 | 48 | * <li>NAME</li>
|
|
78 | 80 | * {@link SVStratificationEngineArgumentsCollection#trackNameList} parameters. For example,
|
79 | 81 | * </p>
|
80 | 82 | * <pre>
|
81 |
| - * gatk GroupedSVCluster \ |
| 83 | + * gatk SVStratify \ |
82 | 84 | * --track-name RM \
|
83 | 85 | * --track-intervals repeatmasker.bed \
|
84 | 86 | * --track-name SD \
|
|
104 | 106 | * overlap is only defined by {@link SVStratificationEngineArgumentsCollection#numBreakpointOverlapsInterchrom}.
|
105 | 107 | * </p>
|
106 | 108 | *
|
107 |
| - * <p>By default, each stratification group must be mutually exclusive, meaning that any given SV can only belong to |
| 109 | + * <p>If using the --split-output option then each stratification group must be mutually exclusive, meaning that any given SV can only belong to |
108 | 110 | * one group. An error is thrown if the tool encounters a variant that meets the criteria for more than one group.
|
109 |
| - * This restriction can be overridden with the {@link SVStratify#ALLOW_MULTIPLE_MATCHES_LONG_NAME} argument, in which |
110 |
| - * case the record will be written out multiple times: once for each matching stratification group with the corresponding |
111 |
| - * {@link GATKSVVCFConstants#STRATUM_INFO_KEY} value. Furthermore, SVs that do not match any of the groups will be |
112 |
| - * annotated with the {@link SVStratify#DEFAULT_STRATUM} group.</p> |
| 111 | + * This restriction can be overridden with the {@link SVStratify#ALLOW_MULTIPLE_MATCHES_LONG_NAME} argument, in which case |
| 112 | + * records belonging to multiple stratification groups will be written to each corresponding file (hence possibly |
| 113 | + * resulting in duplicated records).</p> |
113 | 114 | *
|
114 | 115 | * <p>If using {@link #SPLIT_OUTPUT_LONG_NAME} then the tool generates a set of VCFs as output with each VCF containing
|
115 | 116 | * the records of each group.</p>
|
@@ -242,7 +243,7 @@ protected void createGroupWriter(final String name, final Path path) {
|
242 | 243 | }
|
243 | 244 |
|
244 | 245 | public static void addStratifyMetadata(final VCFHeader header) {
|
245 |
| - header.addMetaDataLine(new VCFInfoHeaderLine(GATKSVVCFConstants.STRATUM_INFO_KEY, 1, |
| 246 | + header.addMetaDataLine(new VCFInfoHeaderLine(GATKSVVCFConstants.STRATUM_INFO_KEY, VCFHeaderLineCount.UNBOUNDED, |
246 | 247 | VCFHeaderLineType.String, "Stratum ID"));
|
247 | 248 | }
|
248 | 249 |
|
@@ -322,16 +323,23 @@ public void apply(final VariantContext variant, final ReadsContext readsContext,
|
322 | 323 | if (stratifications.isEmpty()) {
|
323 | 324 | writers.get(DEFAULT_STRATUM).add(builder.attribute(GATKSVVCFConstants.STRATUM_INFO_KEY, DEFAULT_STRATUM).make());
|
324 | 325 | } else {
|
325 |
| - if (!allowMultipleMatches && stratifications.size() > 1) { |
326 |
| - final String matchesString = String.join(", ", stratifications.stream().map(SVStratificationEngine.Stratum::getName).collect(Collectors.toList())); |
327 |
| - throw new GATKException("Record " + record.getId() + " matched multiple groups: " + matchesString + ". Bypass this error using the --" + ALLOW_MULTIPLE_MATCHES_LONG_NAME + " argument"); |
328 |
| - } |
329 |
| - for (final SVStratificationEngine.Stratum stratum : stratifications) { |
330 |
| - final VariantContextWriter writer = splitOutput ? writers.get(stratum.getName()) : writers.get(DEFAULT_STRATUM); |
331 |
| - if (writer == null) { |
332 |
| - throw new GATKException("Writer not found for group: " + stratum.getName()); |
| 326 | + final List<String> stratumNames = new ArrayList<>(stratifications).stream().map(SVStratificationEngine.Stratum::getName).sorted().collect(Collectors.toUnmodifiableList()); |
| 327 | + final VariantContext outputVariant = builder.attribute(GATKSVVCFConstants.STRATUM_INFO_KEY, stratumNames).make(); |
| 328 | + if (splitOutput) { |
| 329 | + if (!allowMultipleMatches && stratifications.size() > 1) { |
| 330 | + final String matchesString = String.join(", ", stratumNames); |
| 331 | + throw new GATKException("Record " + record.getId() + " matched multiple groups: " + matchesString + ". Bypass this error using the --" + ALLOW_MULTIPLE_MATCHES_LONG_NAME + " argument"); |
| 332 | + } |
| 333 | + for (final SVStratificationEngine.Stratum stratum : stratifications) { |
| 334 | + final VariantContextWriter writer = writers.get(stratum.getName()); |
| 335 | + if (writer == null) { |
| 336 | + throw new GATKException("Writer not found for group: " + stratum.getName()); |
| 337 | + } |
| 338 | + writer.add(outputVariant); |
333 | 339 | }
|
334 |
| - writer.add(builder.attribute(GATKSVVCFConstants.STRATUM_INFO_KEY, stratum.getName()).make()); |
| 340 | + } else { |
| 341 | + final VariantContextWriter writer = writers.get(DEFAULT_STRATUM); |
| 342 | + writer.add(outputVariant); |
335 | 343 | }
|
336 | 344 | }
|
337 | 345 | }
|
|
0 commit comments