Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added a '--prefer-mane-transcripts' mode that enforces MANE_Select tagged Gencode transcripts where possible #9012

Merged
merged 3 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,14 @@ public abstract class BaseFuncotatorArgumentCollection implements Serializable {
)
public TranscriptSelectionMode transcriptSelectionMode = FuncotatorArgumentDefinitions.TRANSCRIPT_SELECTION_MODE_DEFAULT_VALUE;

@Advanced
@Argument(
fullName = FuncotatorArgumentDefinitions.PREFER_MANE_TRANSCRIPT_MODE,
optional = true,
doc = "If this flag is set, Funcotator will only consider 'MANE_Plus_Clinical' followed by 'MANE_select' transcripts if one is present for a given variant. If neither tag is present it use the default behavior."
jonn-smith marked this conversation as resolved.
Show resolved Hide resolved
)
public boolean MANETranscriptMode = false;

@Argument(
fullName = FuncotatorArgumentDefinitions.TRANSCRIPT_LIST_LONG_NAME,
optional = true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,8 @@ public void onTraversalStart() {
new FlankSettings(0,0),
true,
funcotatorArgs.minNumBasesForValidSegment,
funcotatorArgs.spliceSiteWindow
funcotatorArgs.spliceSiteWindow,
funcotatorArgs.MANETranscriptMode
).stream()
.filter(DataSourceFuncotationFactory::isSupportingSegmentFuncotation)
.collect(Collectors.toList());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -794,7 +794,8 @@ public void onTraversalStart() {
new FlankSettings(funcotatorArgs.fivePrimeFlankSize, funcotatorArgs.threePrimeFlankSize),
false,
funcotatorArgs.minNumBasesForValidSegment,
funcotatorArgs.spliceSiteWindow
funcotatorArgs.spliceSiteWindow,
funcotatorArgs.MANETranscriptMode
);

logger.info("Initializing Funcotator Engine...");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ public class FuncotatorArgumentDefinitions {
public static final String TRANSCRIPT_SELECTION_MODE_LONG_NAME = "transcript-selection-mode";
public static final TranscriptSelectionMode TRANSCRIPT_SELECTION_MODE_DEFAULT_VALUE = TranscriptSelectionMode.CANONICAL;

public static final String PREFER_MANE_TRANSCRIPT_MODE = "prefer-mane-transcripts";

/**
* Do not give this a static default value or the integration tests will get hosed.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ private static boolean isValidDirectory(final Path p) {
* ignored for those that don't.
* @param minBasesForValidSegment The minimum number of bases for a segment to be considered valid.
* @param spliceSiteWindowSize The number of bases on either side of a splice site for a variant to be a {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation.VariantClassification#SPLICE_SITE} variant.
* @param preferMANETranscriptsWhereApplicable If this is set, in GencodeFunctationFactory, we will only emit MANE transcripts if any are availible for a given variant, otherwise behaves as normal.
jamesemery marked this conversation as resolved.
Show resolved Hide resolved
* @return A {@link List} of {@link DataSourceFuncotationFactory} given the data source metadata, overrides, and transcript reporting priority information.
*/
public static List<DataSourceFuncotationFactory> createDataSourceFuncotationFactoriesForDataSources(final Map<Path, Properties> dataSourceMetaData,
Expand All @@ -340,7 +341,8 @@ public static List<DataSourceFuncotationFactory> createDataSourceFuncotationFact
final FlankSettings flankSettings,
final boolean doAttemptSegmentFuncotationForTranscriptDatasources,
final int minBasesForValidSegment,
final int spliceSiteWindowSize) {
final int spliceSiteWindowSize,
final boolean preferMANETranscriptsWhereApplicable) {
Utils.nonNull(dataSourceMetaData);
Utils.nonNull(annotationOverridesMap);
Utils.nonNull(transcriptSelectionMode);
Expand Down Expand Up @@ -379,7 +381,7 @@ public static List<DataSourceFuncotationFactory> createDataSourceFuncotationFact
case GENCODE:
featureInput = createAndRegisterFeatureInputs(path, properties, gatkToolInstance, lookaheadFeatureCachingInBp, GencodeGtfFeature.class, false);
funcotationFactory = DataSourceUtils.createGencodeDataSource(path, properties, annotationOverridesMap, transcriptSelectionMode,
userTranscriptIdSet, featureInput, flankSettings, doAttemptSegmentFuncotationForTranscriptDatasources, minBasesForValidSegment, spliceSiteWindowSize);
userTranscriptIdSet, featureInput, flankSettings, doAttemptSegmentFuncotationForTranscriptDatasources, minBasesForValidSegment, spliceSiteWindowSize, preferMANETranscriptsWhereApplicable);
break;
case VCF:
featureInput = createAndRegisterFeatureInputs(path, properties, gatkToolInstance, lookaheadFeatureCachingInBp, VariantContext.class, false);
Expand Down Expand Up @@ -596,7 +598,8 @@ private static GencodeFuncotationFactory createGencodeDataSource(final Path data
final FlankSettings flankSettings,
final boolean isSegmentFuncotationEnabled,
final int minBasesForValidSegment,
final int spliceSiteWindowSize) {
final int spliceSiteWindowSize,
final boolean onlyUseMANETranscriptsWhenApplicable) {
Utils.nonNull(dataSourceFile);
Utils.nonNull(dataSourceProperties);
Utils.nonNull(annotationOverridesMap);
Expand Down Expand Up @@ -626,7 +629,8 @@ private static GencodeFuncotationFactory createGencodeDataSource(final Path data
ncbiBuildVersion,
isSegmentFuncotationEnabled,
minBasesForValidSegment,
spliceSiteWindowSize
spliceSiteWindowSize,
onlyUseMANETranscriptsWhenApplicable
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,11 @@ public class GencodeFuncotationFactory extends DataSourceFuncotationFactory {
*/
private boolean isSegmentFuncotationEnabled;

/**
* If this is true, only MANE transcripts will be used for funcotation creation when at least one is present.
*/
private boolean onlyUseMANETranscripts;
jamesemery marked this conversation as resolved.
Show resolved Hide resolved

//==================================================================================================================
// Constructors:

Expand Down Expand Up @@ -354,7 +359,7 @@ public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFilePath,

this(gencodeTranscriptFastaFilePath, version, name, transcriptSelectionMode, userRequestedTranscripts,
annotationOverrides, mainFeatureInput, flankSettings, isDataSourceB37, ncbiBuildVersion,
isSegmentFuncotationEnabled, minBasesForValidSegment, FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE);
isSegmentFuncotationEnabled, minBasesForValidSegment, FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE, false);
}

/**
Expand Down Expand Up @@ -385,7 +390,8 @@ public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFilePath,
final String ncbiBuildVersion,
final boolean isSegmentFuncotationEnabled,
final int minBasesForValidSegment,
final int spliceSiteWindowSize) {
final int spliceSiteWindowSize,
final boolean preferMANETranscriptsWhereApplicable) {

super(mainFeatureInput, minBasesForValidSegment);

Expand Down Expand Up @@ -429,6 +435,8 @@ public GencodeFuncotationFactory(final Path gencodeTranscriptFastaFilePath,

// Initialize overrides / defaults:
initializeAnnotationOverrides( annotationOverrides );

this.onlyUseMANETranscripts = preferMANETranscriptsWhereApplicable; //todo preferMANETranscriptsWhereApplicable;
jamesemery marked this conversation as resolved.
Show resolved Hide resolved
}

private Path localizeGencodeTranscriptFastaFile( final Path gencodeTranscriptFastaFilePath ) {
Expand Down Expand Up @@ -622,6 +630,27 @@ private static List<GencodeGtfGeneFeature> convertFeaturesToGencodeGtfGeneFeatur
.collect(Collectors.toList());
}

/**
* If MANE_Plus_Clinical transcripts are avalible, only return them, followed by MANE_Select transcripts, followed by all transcripts.
* @param transcripts of gencode transcripts to possibly filter
* @return
*/
private List<GencodeGtfTranscriptFeature> filterToMANETranscripts(final List<GencodeGtfTranscriptFeature> transcripts) {
jamesemery marked this conversation as resolved.
Show resolved Hide resolved
final List<GencodeGtfTranscriptFeature> plusClincal = transcripts.stream()
.filter(g -> hasTag(g, MANE_PLUS_CLINICAL)).toList();
if (plusClincal.size() > 0) {
return plusClincal;
}

final List<GencodeGtfTranscriptFeature> maneSelectTranscripts = transcripts.stream()
.filter(g -> hasTag(g, MANE_SELECT)).toList();

if (maneSelectTranscripts.size() > 0) {
return maneSelectTranscripts;
}

return transcripts;
}

/**
* {@inheritDoc}
Expand Down Expand Up @@ -853,7 +882,7 @@ static boolean isVariantInCodingRegion(final GencodeFuncotation.VariantClassific
*/
private List<GencodeFuncotation> createFuncotationsHelper(final VariantContext variant, final Allele altAllele, final GencodeGtfGeneFeature gtfFeature, final ReferenceContext reference) {

final List<GencodeGtfTranscriptFeature> transcriptList;
List<GencodeGtfTranscriptFeature> transcriptList;

// Only get basic transcripts if we're using data from Gencode:
if ( gtfFeature.getGtfSourceFileType().equals(GencodeGtfCodec.GTF_FILE_TYPE_STRING) ) {
Expand All @@ -863,6 +892,11 @@ private List<GencodeFuncotation> createFuncotationsHelper(final VariantContext v
transcriptList = gtfFeature.getTranscripts();
}

// Filter out the non-MANE_Select/Mane_Plus_Clinical transcripts if we're only using MANE transcripts:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you rename the input variable in the following method to reflect that the transcripts no longer need to be only basic?

private List<GencodeFuncotation> createFuncotationsHelper(final VariantContext variant, final Allele altAllele, final ReferenceContext reference, final List<GencodeGtfTranscriptFeature> basicTranscripts)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that is not accurate though, the transcripts still have to be basic. This simply applies a layer on-top of the basic transcript filtering that enforces that they are MANE select as well (which my cursory survey of gtf files seemed to indicated travels together anyway)

if (onlyUseMANETranscripts) {
transcriptList = filterToMANETranscripts(transcriptList);
}

return createFuncotationsHelper(variant, altAllele, reference, transcriptList);
}

Expand Down Expand Up @@ -979,9 +1013,14 @@ static final GencodeFuncotation createDefaultFuncotationsOnProblemVariant( final

private static boolean isBasic(final GencodeGtfTranscriptFeature transcript) {
// Check if this transcript has the `basic` tag:
return hasTag(transcript, GencodeGTFFieldConstants.FeatureTag.BASIC);
}

private static boolean hasTag(final GencodeGtfTranscriptFeature transcript, final GencodeGTFFieldConstants.FeatureTag tag) {
// Check if this transcript has the given tag:
return transcript.getOptionalFields().stream()
.filter( f -> f.getName().equals("tag") )
.filter( f -> f.getValue().equals(GencodeGTFFieldConstants.FeatureTag.BASIC.toString()) )
.filter( f -> f.getValue().equals(tag.toString()) )
.count() > 0;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ public void testGetFuncotationFactoriesAndCreateFuncotationMapForVariant(final F
new FlankSettings(0, 0),
false,
FuncotatorUtils.DEFAULT_MIN_NUM_BASES_FOR_VALID_SEGMENT,
FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE)
FuncotatorUtils.DEFAULT_SPLICE_SITE_WINDOW_SIZE,
false)
);

for (int i = 0; i < entireVcf.getRight().size(); i++) {
Expand Down
Loading
Loading