Skip to content

Commit

Permalink
Added a '--prefer-mane-transcripts' mode that enforces MANE_Select ta…
Browse files Browse the repository at this point in the history
…gged Gencode transcripts where possible (#9012)

* Added a '--prefer-mane-transcripts' mode that enforces MANE_Select tagged Gencode transcripts where possible
  • Loading branch information
jamesemery authored Oct 23, 2024
1 parent d056c32 commit c4860d4
Show file tree
Hide file tree
Showing 29 changed files with 297 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,14 @@ public abstract class BaseFuncotatorArgumentCollection implements Serializable {
)
public TranscriptSelectionMode transcriptSelectionMode = FuncotatorArgumentDefinitions.TRANSCRIPT_SELECTION_MODE_DEFAULT_VALUE;

@Advanced
@Argument(
fullName = FuncotatorArgumentDefinitions.PREFER_MANE_TRANSCRIPT_MODE,
optional = true,
doc = "If this flag is set, Funcotator will prefer 'MANE_Plus_Clinical' followed by 'MANE_select' transcripts (including those not tagged 'basic') if one is present for a given variant. If neither tag is present it use the default behavior (only base transcripts)."
)
public boolean MANETranscriptMode = false;

@Argument(
fullName = FuncotatorArgumentDefinitions.TRANSCRIPT_LIST_LONG_NAME,
optional = true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,8 @@ public void onTraversalStart() {
new FlankSettings(0,0),
true,
funcotatorArgs.minNumBasesForValidSegment,
funcotatorArgs.spliceSiteWindow
funcotatorArgs.spliceSiteWindow,
funcotatorArgs.MANETranscriptMode
).stream()
.filter(DataSourceFuncotationFactory::isSupportingSegmentFuncotation)
.collect(Collectors.toList());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -794,7 +794,8 @@ public void onTraversalStart() {
new FlankSettings(funcotatorArgs.fivePrimeFlankSize, funcotatorArgs.threePrimeFlankSize),
false,
funcotatorArgs.minNumBasesForValidSegment,
funcotatorArgs.spliceSiteWindow
funcotatorArgs.spliceSiteWindow,
funcotatorArgs.MANETranscriptMode
);

logger.info("Initializing Funcotator Engine...");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ public class FuncotatorArgumentDefinitions {
public static final String TRANSCRIPT_SELECTION_MODE_LONG_NAME = "transcript-selection-mode";
public static final TranscriptSelectionMode TRANSCRIPT_SELECTION_MODE_DEFAULT_VALUE = TranscriptSelectionMode.CANONICAL;

public static final String PREFER_MANE_TRANSCRIPT_MODE = "prefer-mane-transcripts";

/**
* Do not give this a static default value or the integration tests will get hosed.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ private static boolean isValidDirectory(final Path p) {
* ignored for those that don't.
* @param minBasesForValidSegment The minimum number of bases for a segment to be considered valid.
* @param spliceSiteWindowSize The number of bases on either side of a splice site for a variant to be a {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation.VariantClassification#SPLICE_SITE} variant.
* @param preferMANETranscriptsWhereApplicable If this is set, in {@link GencodeFuncotationFactory}, we will only emit MANE transcripts if any are availible for a given variant, otherwise behaves as normal.
* @return A {@link List} of {@link DataSourceFuncotationFactory} given the data source metadata, overrides, and transcript reporting priority information.
*/
public static List<DataSourceFuncotationFactory> createDataSourceFuncotationFactoriesForDataSources(final Map<Path, Properties> dataSourceMetaData,
Expand All @@ -340,7 +341,8 @@ public static List<DataSourceFuncotationFactory> createDataSourceFuncotationFact
final FlankSettings flankSettings,
final boolean doAttemptSegmentFuncotationForTranscriptDatasources,
final int minBasesForValidSegment,
final int spliceSiteWindowSize) {
final int spliceSiteWindowSize,
final boolean preferMANETranscriptsWhereApplicable) {
Utils.nonNull(dataSourceMetaData);
Utils.nonNull(annotationOverridesMap);
Utils.nonNull(transcriptSelectionMode);
Expand Down Expand Up @@ -379,7 +381,7 @@ public static List<DataSourceFuncotationFactory> createDataSourceFuncotationFact
case GENCODE:
featureInput = createAndRegisterFeatureInputs(path, properties, gatkToolInstance, lookaheadFeatureCachingInBp, GencodeGtfFeature.class, false);
funcotationFactory = DataSourceUtils.createGencodeDataSource(path, properties, annotationOverridesMap, transcriptSelectionMode,
userTranscriptIdSet, featureInput, flankSettings, doAttemptSegmentFuncotationForTranscriptDatasources, minBasesForValidSegment, spliceSiteWindowSize);
userTranscriptIdSet, featureInput, flankSettings, doAttemptSegmentFuncotationForTranscriptDatasources, minBasesForValidSegment, spliceSiteWindowSize, preferMANETranscriptsWhereApplicable);
break;
case VCF:
featureInput = createAndRegisterFeatureInputs(path, properties, gatkToolInstance, lookaheadFeatureCachingInBp, VariantContext.class, false);
Expand Down Expand Up @@ -596,7 +598,8 @@ private static GencodeFuncotationFactory createGencodeDataSource(final Path data
final FlankSettings flankSettings,
final boolean isSegmentFuncotationEnabled,
final int minBasesForValidSegment,
final int spliceSiteWindowSize) {
final int spliceSiteWindowSize,
final boolean onlyUseMANETranscriptsWhenApplicable) {
Utils.nonNull(dataSourceFile);
Utils.nonNull(dataSourceProperties);
Utils.nonNull(annotationOverridesMap);
Expand Down Expand Up @@ -626,7 +629,8 @@ private static GencodeFuncotationFactory createGencodeDataSource(final Path data
ncbiBuildVersion,
isSegmentFuncotationEnabled,
minBasesForValidSegment,
spliceSiteWindowSize
spliceSiteWindowSize,
onlyUseMANETranscriptsWhenApplicable
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,11 @@ public class GencodeFuncotationFactory extends DataSourceFuncotationFactory {
*/
private boolean isSegmentFuncotationEnabled;

/**
* If this is true, only MANE transcripts will be used for funcotation creation when at least one is present.
*/
private boolean preferMANETranscripts;

//==================================================================================================================
// Constructors:

Expand Down Expand Up @@ -354,7 +359,7 @@ public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFilePath,

this(gencodeTranscriptFastaFilePath, version, name, transcriptSelectionMode, userRequestedTranscripts,
annotationOverrides, mainFeatureInput, flankSettings, isDataSourceB37, ncbiBuildVersion,
isSegmentFuncotationEnabled, minBasesForValidSegment, FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE);
isSegmentFuncotationEnabled, minBasesForValidSegment, FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE, false);
}

/**
Expand Down Expand Up @@ -385,7 +390,8 @@ public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFilePath,
final String ncbiBuildVersion,
final boolean isSegmentFuncotationEnabled,
final int minBasesForValidSegment,
final int spliceSiteWindowSize) {
final int spliceSiteWindowSize,
final boolean preferMANETranscriptsWhereApplicable) {

super(mainFeatureInput, minBasesForValidSegment);

Expand Down Expand Up @@ -429,6 +435,8 @@ public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFilePath,

// Initialize overrides / defaults:
initializeAnnotationOverrides( annotationOverrides );

this.preferMANETranscripts = preferMANETranscriptsWhereApplicable;
}

private Path localizeGencodeTranscriptFastaFile( final Path gencodeTranscriptFastaFilePath ) {
Expand Down Expand Up @@ -622,6 +630,28 @@ private static List<GencodeGtfGeneFeature> convertFeaturesToGencodeGtfGeneFeatur
.collect(Collectors.toList());
}

/**
* If MANE_Plus_Clinical transcripts are avalible, only return them, followed by MANE_Select transcripts, followed by only the basic transcripts if none were MANE_Plus_Clinical or MANE_Select.
* @param transcripts of gencode transcripts to possibly filter
* @return
*/
@VisibleForTesting
static List<GencodeGtfTranscriptFeature> retreiveMANESelectModeTranscriptsCriteria(final List<GencodeGtfTranscriptFeature> transcripts) {
final List<GencodeGtfTranscriptFeature> plusClincal = transcripts.stream()
.filter(g -> hasTag(g, MANE_PLUS_CLINICAL)).toList();
if (plusClincal.size() > 0) {
return plusClincal;
}

final List<GencodeGtfTranscriptFeature> maneSelectTranscripts = transcripts.stream()
.filter(g -> hasTag(g, MANE_SELECT)).toList();

if (maneSelectTranscripts.size() > 0) {
return maneSelectTranscripts;
}

return transcripts.stream().filter(GencodeFuncotationFactory::isBasic).collect(Collectors.toList());
}

/**
* {@inheritDoc}
Expand Down Expand Up @@ -853,16 +883,21 @@ static boolean isVariantInCodingRegion(final GencodeFuncotation.VariantClassific
*/
private List<GencodeFuncotation> createFuncotationsHelper(final VariantContext variant, final Allele altAllele, final GencodeGtfGeneFeature gtfFeature, final ReferenceContext reference) {

final List<GencodeGtfTranscriptFeature> transcriptList;
List<GencodeGtfTranscriptFeature> transcriptList;

// Only get basic transcripts if we're using data from Gencode:
if ( gtfFeature.getGtfSourceFileType().equals(GencodeGtfCodec.GTF_FILE_TYPE_STRING) ) {
transcriptList = retrieveBasicTranscripts(gtfFeature);
}
else {
if (preferMANETranscripts) {
// Filter out the non-MANE_Select/Mane_Plus_Clinical transcripts if we're only using MANE transcripts:
transcriptList = retreiveMANESelectModeTranscriptsCriteria(gtfFeature.getTranscripts());
} else {
transcriptList = retrieveBasicTranscripts(gtfFeature);
}
} else {
transcriptList = gtfFeature.getTranscripts();
}


return createFuncotationsHelper(variant, altAllele, reference, transcriptList);
}

Expand Down Expand Up @@ -979,9 +1014,14 @@ static final GencodeFuncotation createDefaultFuncotationsOnProblemVariant( final

private static boolean isBasic(final GencodeGtfTranscriptFeature transcript) {
// Check if this transcript has the `basic` tag:
return hasTag(transcript, GencodeGTFFieldConstants.FeatureTag.BASIC);
}

private static boolean hasTag(final GencodeGtfTranscriptFeature transcript, final GencodeGTFFieldConstants.FeatureTag tag) {
// Check if this transcript has the given tag:
return transcript.getOptionalFields().stream()
.filter( f -> f.getName().equals("tag") )
.filter( f -> f.getValue().equals(GencodeGTFFieldConstants.FeatureTag.BASIC.toString()) )
.filter( f -> f.getValue().equals(tag.toString()) )
.count() > 0;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ public void testGetFuncotationFactoriesAndCreateFuncotationMapForVariant(final F
new FlankSettings(0, 0),
false,
FuncotatorUtils.DEFAULT_MIN_NUM_BASES_FOR_VALID_SEGMENT,
FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE)
FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE,
false)
);

for (int i = 0; i < entireVcf.getRight().size(); i++) {
Expand Down
Loading

0 comments on commit c4860d4

Please sign in to comment.