Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
Michael Stromberg committed Jun 13, 2017
2 parents 53edff6 + f0b4a05 commit 82f37e9
Show file tree
Hide file tree
Showing 1,139 changed files with 78,094 additions and 75,408 deletions.
508 changes: 254 additions & 254 deletions .gitignore

Large diffs are not rendered by default.

98 changes: 49 additions & 49 deletions CacheUtils/CacheUtils.cs
Original file line number Diff line number Diff line change
@@ -1,49 +1,49 @@
using System.Collections.Generic;
using CacheUtils.CombineAndUpdateGenes;
using CacheUtils.CombineCacheDirectories;
using CacheUtils.CreateCache;
using CacheUtils.ExtractRegulatoryElements;
using CacheUtils.ExtractTranscripts;
using CacheUtils.GFF;
using CacheUtils.ParseVepCacheDirectory;
using CacheUtils.RegulatoryGFF;
using CacheUtils.UpdateMiniCacheFiles;
using NDesk.Options;
using VariantAnnotation.CommandLine;
using VariantAnnotation.Utilities;

namespace CacheUtils
{
public class CacheUtils : TopLevelCommandLineHandler
{
/// <summary>
/// constructor
/// </summary>
private CacheUtils(string programDescription, Dictionary<string, TopLevelOption> ops, string authors,
IVersionProvider provider = null)
: base(programDescription, OutputHelper.GetExecutableName(), ops, authors, provider)
{ }

public static int Main(string[] args)
{
var ops = new Dictionary<string, TopLevelOption>
{
["combine"] = new TopLevelOption("combine cache directories", CombineCacheDirectoriesMain.Run),
["create"] = new TopLevelOption("create Nirvana cache files", CreateNirvanaDatabaseMain.Run),
["extreg"] = new TopLevelOption("extracts regulatory regions", ExtractRegulatoryElementsMain.Run),
["exttran"] = new TopLevelOption("extracts transcripts", ExtractTranscriptsMain.Run),
["gff"] = new TopLevelOption("export transcripts to GFF", CreateGff.Run),
["parse"] = new TopLevelOption("parses the VEP cache files", ParseVepCacheDirectoryMain.Run),
["rgff"] = new TopLevelOption("export regulatory regions to GFF", CreateRegulatoryGff.Run),
["gene"] = new TopLevelOption("updates genes in intermediate files", CombineAndUpdateGenesMain.Run),
["update"] = new TopLevelOption("updates the mini-cache files", UpdateMiniCacheFilesMain.Run)
};

var utils = new CacheUtils("Utilities focused on querying the cache directory", ops, VariantAnnotation.DataStructures.Constants.Authors,
new CacheVersionProvider());

utils.ParseCommandLine(args);
return utils.ExitCode;
}
}
}
using System.Collections.Generic;
using CacheUtils.CombineAndUpdateGenes;
using CacheUtils.CombineCacheDirectories;
using CacheUtils.CreateCache;
using CacheUtils.ExtractRegulatoryElements;
using CacheUtils.ExtractTranscripts;
using CacheUtils.GFF;
using CacheUtils.ParseVepCacheDirectory;
using CacheUtils.RegulatoryGFF;
using CacheUtils.UpdateMiniCacheFiles;
using CommandLine.Handlers;
using CommandLine.Utilities;
using CommandLine.VersionProvider;

namespace CacheUtils
{
public class CacheUtils : TopLevelCommandLineHandler
{
/// <summary>
/// constructor
/// </summary>
private CacheUtils(string programDescription, Dictionary<string, TopLevelOption> ops, string authors,
IVersionProvider provider = null)
: base(programDescription, OutputHelper.GetExecutableName(), ops, authors, provider)
{ }

public static int Main(string[] args)
{
var ops = new Dictionary<string, TopLevelOption>
{
["combine"] = new TopLevelOption("combine cache directories", CombineCacheDirectoriesMain.Run),
["create"] = new TopLevelOption("create Nirvana cache files", CreateNirvanaDatabaseMain.Run),
["extreg"] = new TopLevelOption("extracts regulatory regions", ExtractRegulatoryElementsMain.Run),
["exttran"] = new TopLevelOption("extracts transcripts", ExtractTranscriptsMain.Run),
["gff"] = new TopLevelOption("export transcripts to GFF", CreateGff.Run),
["parse"] = new TopLevelOption("parses the VEP cache files", ParseVepCacheDirectoryMain.Run),
["rgff"] = new TopLevelOption("export regulatory regions to GFF", CreateRegulatoryGff.Run),
["gene"] = new TopLevelOption("updates genes in intermediate files", CombineAndUpdateGenesMain.Run),
["update"] = new TopLevelOption("updates the mini-cache files", UpdateMiniCacheFilesMain.Run)
};

var utils = new CacheUtils("Utilities focused on querying the cache directory", ops, VariantAnnotation.DataStructures.Constants.Authors,
new CacheVersionProvider());

utils.ParseCommandLine(args);
return utils.ExitCode;
}
}
}
34 changes: 21 additions & 13 deletions CacheUtils/CacheUtils.csproj
Original file line number Diff line number Diff line change
@@ -1,14 +1,22 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>netcoreapp1.1</TargetFramework>
<OutputPath>..\bin\$(Configuration)</OutputPath>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\ErrorHandling\ErrorHandling.csproj" />
<ProjectReference Include="..\NDesk.Options\NDesk.Options.csproj" />
<ProjectReference Include="..\VariantAnnotation.Interface\VariantAnnotation.Interface.csproj" />
<ProjectReference Include="..\VariantAnnotation\VariantAnnotation.csproj" />
</ItemGroup>
<Import Project="..\VariantAnnotation\CommonAssemblyInfo.props" />
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup Label="Configuration" Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
<OutputType>exe</OutputType>
</PropertyGroup>
<PropertyGroup Label="Configuration" Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
<OutputType>exe</OutputType>
</PropertyGroup>
<PropertyGroup>
<TargetFramework>netcoreapp1.1</TargetFramework>
<OutputPath>..\bin\$(Configuration)</OutputPath>
<OutputTypeEx>exe</OutputTypeEx>
<ApplicationIcon />
<StartupObject />
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\ErrorHandling\ErrorHandling.csproj" />
<ProjectReference Include="..\CommandLine\CommandLine.csproj" />
<ProjectReference Include="..\VariantAnnotation.Interface\VariantAnnotation.Interface.csproj" />
<ProjectReference Include="..\VariantAnnotation\VariantAnnotation.csproj" />
</ItemGroup>
<Import Project="..\VariantAnnotation\CommonAssemblyInfo.props" />
</Project>
25 changes: 13 additions & 12 deletions CacheUtils/CacheVersionProvider.cs
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
using VariantAnnotation.FileHandling.TranscriptCache;
using VariantAnnotation.Utilities;

namespace CacheUtils
{
public class CacheVersionProvider : IVersionProvider
{
public string GetProgramVersion() => $"Nirvana {CommandLineUtilities.InformationalVersion}";

public string GetDataVersion() => $"Cache version: {CacheConstants.DataVersion}";
}
}
using CommandLine.Utilities;
using CommandLine.VersionProvider;
using VariantAnnotation.FileHandling.TranscriptCache;

namespace CacheUtils
{
public class CacheVersionProvider : IVersionProvider
{
public string GetProgramVersion() => $"Nirvana {CommandLineUtilities.InformationalVersion}";

public string GetDataVersion() => $"Cache version: {CacheConstants.DataVersion}";
}
}
170 changes: 85 additions & 85 deletions CacheUtils/CombineAndUpdateGenes/Algorithms/GeneFlattener.cs
Original file line number Diff line number Diff line change
@@ -1,85 +1,85 @@
using System;
using System.Collections.Generic;
using System.Linq;
using CacheUtils.CombineAndUpdateGenes.DataStructures;
using CacheUtils.CombineAndUpdateGenes.Utilities;
using ErrorHandling.Exceptions;
using VariantAnnotation.Algorithms;
using VariantAnnotation.DataStructures;

namespace CacheUtils.CombineAndUpdateGenes.Algorithms
{
public class GeneFlattener
{
private readonly List<MutableGene> _genes;
private readonly string _description;
private readonly bool _isEnsembl;

private readonly bool _showOutput;

/// <summary>
/// constructor
/// </summary>
public GeneFlattener(List<MutableGene> genes, string description, bool showOutput = true)
{
_genes = genes;
_description = description;
_isEnsembl = genes.Count == 0 || genes.First().TranscriptDataSource == TranscriptDataSource.Ensembl;
_showOutput = showOutput;
}

public List<MutableGene> Flatten(int overlapStart = -1, int overlapEnd = -1)
{
var combinedGenes = new List<MutableGene>();
var genesById = GeneUtilities.GetGenesById(_genes, _isEnsembl);

foreach (var gene in _genes)
{
if (gene.Invalid) continue;

var geneId = _isEnsembl
? gene.EnsemblId.ToString()
: gene.EntrezGeneId.ToString();

List<MutableGene> genesWithSameGeneId;
if (!genesById.TryGetValue(geneId, out genesWithSameGeneId))
{
throw new UserErrorException($"Unable to find similar genes for {geneId}");
}

combinedGenes.Add(GetFlattenedGene(gene, genesWithSameGeneId, overlapStart, overlapEnd));
}

if(_showOutput) Console.WriteLine($" - {_description}: {combinedGenes.Count} genes.");

return combinedGenes;
}

private static MutableGene GetFlattenedGene(MutableGene seedGene, List<MutableGene> genesWithSameGeneId,
int overlapStart, int overlapEnd)
{
var flattenedGene = MutableGene.Clone(seedGene);
bool useOverlap = overlapStart != -1 && overlapEnd != -1;

foreach (var gene in genesWithSameGeneId)
{
if (gene.Invalid || flattenedGene.OnReverseStrand != gene.OnReverseStrand ||
flattenedGene.ReferenceIndex != gene.ReferenceIndex) continue;

if (useOverlap && !Overlap.Partial(overlapStart, overlapEnd, gene.Start, gene.End)) continue;
if (!useOverlap && !Overlap.Partial(flattenedGene.Start, flattenedGene.End, gene.Start, gene.End)) continue;

UpdateCoordinates(gene, flattenedGene);
gene.Invalid = true;
}

return flattenedGene;
}

private static void UpdateCoordinates(MutableGene source, MutableGene dest)
{
if (source.Start < dest.Start) dest.Start = source.Start;
if (source.End > dest.End) dest.End = source.End;
}
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using CacheUtils.CombineAndUpdateGenes.DataStructures;
using CacheUtils.CombineAndUpdateGenes.Utilities;
using ErrorHandling.Exceptions;
using VariantAnnotation.Algorithms;
using VariantAnnotation.DataStructures.Transcript;

namespace CacheUtils.CombineAndUpdateGenes.Algorithms
{
public class GeneFlattener
{
private readonly List<MutableGene> _genes;
private readonly string _description;
private readonly bool _isEnsembl;

private readonly bool _showOutput;

/// <summary>
/// constructor
/// </summary>
public GeneFlattener(List<MutableGene> genes, string description, bool showOutput = true)
{
_genes = genes;
_description = description;
_isEnsembl = genes.Count == 0 || genes.First().TranscriptDataSource == TranscriptDataSource.Ensembl;
_showOutput = showOutput;
}

public List<MutableGene> Flatten(int overlapStart = -1, int overlapEnd = -1)
{
var combinedGenes = new List<MutableGene>();
var genesById = GeneUtilities.GetGenesById(_genes, _isEnsembl);

foreach (var gene in _genes)
{
if (gene.Invalid) continue;

var geneId = _isEnsembl
? gene.EnsemblId.ToString()
: gene.EntrezGeneId.ToString();

List<MutableGene> genesWithSameGeneId;
if (!genesById.TryGetValue(geneId, out genesWithSameGeneId))
{
throw new UserErrorException($"Unable to find similar genes for {geneId}");
}

combinedGenes.Add(GetFlattenedGene(gene, genesWithSameGeneId, overlapStart, overlapEnd));
}

if(_showOutput) Console.WriteLine($" - {_description}: {combinedGenes.Count} genes.");

return combinedGenes;
}

private static MutableGene GetFlattenedGene(MutableGene seedGene, List<MutableGene> genesWithSameGeneId,
int overlapStart, int overlapEnd)
{
var flattenedGene = MutableGene.Clone(seedGene);
bool useOverlap = overlapStart != -1 && overlapEnd != -1;

foreach (var gene in genesWithSameGeneId)
{
if (gene.Invalid || flattenedGene.OnReverseStrand != gene.OnReverseStrand ||
flattenedGene.ReferenceIndex != gene.ReferenceIndex) continue;

if (useOverlap && !Overlap.Partial(overlapStart, overlapEnd, gene.Start, gene.End)) continue;
if (!useOverlap && !Overlap.Partial(flattenedGene.Start, flattenedGene.End, gene.Start, gene.End)) continue;

UpdateCoordinates(gene, flattenedGene);
gene.Invalid = true;
}

return flattenedGene;
}

private static void UpdateCoordinates(MutableGene source, MutableGene dest)
{
if (source.Start < dest.Start) dest.Start = source.Start;
if (source.End > dest.End) dest.End = source.End;
}
}
}
Loading

0 comments on commit 82f37e9

Please sign in to comment.