Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions src/lambda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,13 @@ int main(int argc, char const ** argv)

--until; // undo the "+ 1" above

if ((std::string(argv[until]) == "searchp") || (std::string(argv[until]) == "searchn"))
std::string const subcommand_actual = argv[until];

if (subcommand_actual.starts_with("search"))
{
return searchMain(argc - until, argv + until);
}
else if ((std::string(argv[until]) == "mkindexp") || (std::string(argv[until]) == "mkindexn"))
else if (subcommand_actual.starts_with("mkindex"))
{
return mkindexMain(argc - until, argv + until);
}
Expand All @@ -88,24 +90,27 @@ void parseCommandLineMain(int argc, char const ** argv)
sharg::parser parser("lambda3", argc, argv, sharg::update_notifications::off);

parser.info.short_description = "Lambda, the Local Aligner for Massive Biological DatA.";
parser.info.synopsis.push_back("[\\fIOPTIONS\\fP] COMMAND [\\fICOMMAND-OPTIONS\\fP]");
parser.info.synopsis.push_back("lambda3 [\\fIOPTIONS\\fP] COMMAND [\\fICOMMAND-OPTIONS\\fP]");

sharedSetup(parser);

std::string command{};
parser.add_positional_option(
command,
sharg::config{
.description = "The sub-program to execute. See below.",
.validator = sharg::value_list_validator{"searchp", "searchn", "mkindexp", "mkindexn"}
.description = "The sub-program to execute. See above.",
.validator =
sharg::value_list_validator{"searchp", "searchn", "searchbs", "mkindexp", "mkindexn", "mkindexbs"}
});

parser.info.description.push_back("Available commands");
parser.info.description.push_back(
"\\fBsearchp \\fP– Perform a protein search (BLASTP, BLASTX, TBLASTN, TBLASTX).");
parser.info.description.push_back("\\fBsearchn \\fP– Perform a nucleotide search (BLASTN, MEGABLAST).");
parser.info.description.push_back("\\fBsearchbs \\fP– Perform a bisulfite search.");
parser.info.description.push_back("\\fBmkindexp \\fP– Create an index for protein searches.");
parser.info.description.push_back("\\fBmkindexn \\fP– Create an index for nucleotide searches.");
parser.info.description.push_back("\\fBmkindexbs\\fP– Create an index for bisulfite searches.");
parser.info.description.push_back(
"To view the help page for a specific command, simply run 'lambda command --help'.");

Expand Down
108 changes: 53 additions & 55 deletions src/mkindex_options.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@

#include <sharg/all.hpp>

#include "shared_options.hpp"

// --------------------------------------------------------------------------
// Class LambdaIndexerOptions
// --------------------------------------------------------------------------
Expand Down Expand Up @@ -54,17 +56,26 @@ struct LambdaIndexerOptions : public SharedOptions
// INDEXER
void parseCommandLine(LambdaIndexerOptions & options, int argc, char const ** argv)
{
std::string programName = "lambda3-" + std::string(argv[0]);
std::string const subcommand = std::string(argv[0]);
std::string const programName = "lambda3-" + subcommand;

// this is important for option handling:
options.nucleotide_mode = (std::string(argv[0]) == "mkindexn");
if (subcommand == "mkindexp")
options.domain = domain_t::protein;
else if (subcommand == "mkindexn")
options.domain = domain_t::nucleotide;
else if (subcommand == "mkindexbs")
options.domain = domain_t::bisulfite;
else
throw std::runtime_error{"Unknown subcommand."};

sharg::parser parser(programName, argc, argv, sharg::update_notifications::off);

parser.info.short_description = "the Local Aligner for Massive Biological DatA";

// Define usage line and long description.
parser.info.synopsis.push_back("[\\fIOPTIONS\\fP] \\-d DATABASE.fasta [-i INDEX.lba]\\fP");
parser.info.synopsis.push_back("lambda3 "s + subcommand +
" [\\fIOPTIONS\\fP] \\-d DATABASE.fasta [-i INDEX.lba]\\fP");

parser.info.description.push_back("This is the indexer command for creating lambda-compatible databases.");

Expand Down Expand Up @@ -162,51 +173,45 @@ void parseCommandLine(LambdaIndexerOptions & options, int argc, char const ** ar
std::string alphabetReductionTmp;
int geneticCodeTmp = 1;

if (options.nucleotide_mode)
{
alphabetReductionTmp = "dna4";
options.indexFileOptions.origAlph = AlphabetEnum::DNA5;
options.indexFileOptions.transAlph = AlphabetEnum::DNA5;
options.indexFileOptions.redAlph = AlphabetEnum::DNA4;

parser.add_section("Alphabet reduction");

parser.add_option(alphabetReductionTmp,
sharg::config{
.short_id = 'r',
.long_id = "alphabet-reduction",
.description = "Alphabet Reduction for seeding phase.",
.advanced = true,
.validator = sharg::value_list_validator{"none", "dna4", "dna3bs"}
});
}
else
switch (options.domain)
{
alphabetReductionTmp = "li10";
options.indexFileOptions.origAlph = AlphabetEnum::UNDEFINED;
options.indexFileOptions.transAlph = AlphabetEnum::AMINO_ACID;
options.indexFileOptions.redAlph = AlphabetEnum::LI10;

parser.add_section("Alphabet and Translation");

parser.add_option(inputAlphabetTmp,
sharg::config{
.short_id = 'a',
.long_id = "input-alphabet",
.description = "Alphabet of the database sequences (specify to override auto-detection); "
"if input is Dna, it will be translated.",
.advanced = true,
.validator = sharg::value_list_validator{"auto", "dna5", "aminoacid"}
});

parser.add_option(alphabetReductionTmp,
sharg::config{
.short_id = 'r',
.long_id = "alphabet-reduction",
.description = "Alphabet Reduction for seeding phase.",
.advanced = true,
.validator = sharg::value_list_validator{"none", "murphy10", "li10"}
});
case domain_t::protein:
alphabetReductionTmp = "li10";
options.indexFileOptions.origAlph = AlphabetEnum::UNDEFINED;
options.indexFileOptions.transAlph = AlphabetEnum::AMINO_ACID;
options.indexFileOptions.redAlph = AlphabetEnum::LI10;

parser.add_section("Alphabet and Translation");

parser.add_option(inputAlphabetTmp,
sharg::config{
.short_id = 'a',
.long_id = "input-alphabet",
.description = "Alphabet of the database sequences (specify to override "
"auto-detection); if input is Dna, it will be translated.",
.advanced = true,
.validator = sharg::value_list_validator{"auto", "dna5", "aminoacid"}
});

parser.add_option(alphabetReductionTmp,
sharg::config{
.short_id = 'r',
.long_id = "alphabet-reduction",
.description = "Alphabet Reduction for seeding phase.",
.advanced = true,
.validator = sharg::value_list_validator{"none", "murphy10", "li10"}
});
break;
case domain_t::nucleotide:
options.indexFileOptions.origAlph = AlphabetEnum::DNA5;
options.indexFileOptions.transAlph = AlphabetEnum::DNA5;
options.indexFileOptions.redAlph = AlphabetEnum::DNA4;
break;
case domain_t::bisulfite:
options.indexFileOptions.origAlph = AlphabetEnum::DNA5;
options.indexFileOptions.transAlph = AlphabetEnum::DNA5;
options.indexFileOptions.redAlph = AlphabetEnum::DNA3BS;
break;
}

parser.add_section("Remarks");
Expand All @@ -222,7 +227,7 @@ void parseCommandLine(LambdaIndexerOptions & options, int argc, char const ** ar
options.indexFileOptions.indexType = DbIndexType::FM_INDEX;

// set options for protein alphabet, genetic code and alphabet reduction
if (!options.nucleotide_mode)
if (options.domain == domain_t::protein)
{
options.indexFileOptions.origAlph = _alphabetNameToEnum(inputAlphabetTmp);
if (alphabetReductionTmp == "none")
Expand All @@ -231,13 +236,6 @@ void parseCommandLine(LambdaIndexerOptions & options, int argc, char const ** ar
options.indexFileOptions.redAlph = _alphabetNameToEnum(alphabetReductionTmp);
options.indexFileOptions.geneticCode = static_cast<bio::alphabet::genetic_code>(geneticCodeTmp);
}
else
{
if (alphabetReductionTmp == "none")
options.indexFileOptions.redAlph = AlphabetEnum::DNA5;
else
options.indexFileOptions.redAlph = _alphabetNameToEnum(alphabetReductionTmp);
}

setEnv("TMPDIR", options.tmpdir);

Expand Down
89 changes: 44 additions & 45 deletions src/search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,7 @@ template <DbIndexType c_indexType>
void argConv1(LambdaOptions const & options);

template <DbIndexType c_indexType, AlphabetEnum c_origSbjAlph>
void argConv2a(LambdaOptions const & options);

template <DbIndexType c_indexType, AlphabetEnum c_origSbjAlph>
void argConv2b(LambdaOptions const & options);
void argConv2(LambdaOptions const & options);

template <DbIndexType c_indexType, AlphabetEnum c_origSbjAlph, AlphabetEnum c_transAlph, AlphabetEnum c_redAlph>
void argConv3(LambdaOptions const & options);
Expand Down Expand Up @@ -175,13 +172,24 @@ void argConv0(LambdaOptions & options)
myPrint(options, 2, " reduced alphabet: ", _alphabetEnumToName(options.indexFileOptions.redAlph), "\n\n");
}

if ((options.nucleotide_mode) && (options.indexFileOptions.redAlph != AlphabetEnum::DNA5 &&
options.indexFileOptions.redAlph != AlphabetEnum::DNA4 &&
options.indexFileOptions.redAlph != AlphabetEnum::DNA3BS))
switch (options.domain)
{
throw std::runtime_error(
"You are attempting a nucleotide search on a protein index. "
"Did you want to use 'lambda3 searchp' instead?");
case domain_t::protein:
if (options.indexFileOptions.transAlph != AlphabetEnum::AMINO_ACID)
throw std::runtime_error{"Attempting to use nucleotide or bisulfite index for protein search."};
break;
case domain_t::nucleotide:
if (options.indexFileOptions.transAlph != AlphabetEnum::DNA5)
throw std::runtime_error{"Attempting to use protein index for nucleotide search."};
if (options.indexFileOptions.redAlph != AlphabetEnum::DNA4)
throw std::runtime_error{"Attempting to use bisulfite index for nucleotide search."};
break;
case domain_t::bisulfite:
if (options.indexFileOptions.transAlph != AlphabetEnum::DNA5)
throw std::runtime_error{"Attempting to use protein index for bisulfite search."};
if (options.indexFileOptions.redAlph != AlphabetEnum::DNA3BS)
throw std::runtime_error{"Attempting to use nucleotid index for bisulfite search."};
break;
}

// query file
Expand Down Expand Up @@ -249,46 +257,37 @@ void argConv0(LambdaOptions & options)
template <DbIndexType c_indexType>
void argConv1(LambdaOptions const & options)
{
if (options.nucleotide_mode)
{
return argConv2a<c_indexType, AlphabetEnum::DNA5>(options);
}
else
{
switch (options.indexFileOptions.origAlph)
{
case AlphabetEnum::DNA5:
return argConv2b<c_indexType, AlphabetEnum::DNA5>(options);
case AlphabetEnum::AMINO_ACID:
return argConv2b<c_indexType, AlphabetEnum::AMINO_ACID>(options);
default:
throw 53;
}
}
}

template <DbIndexType c_indexType, AlphabetEnum c_origSbjAlph>
void argConv2a(LambdaOptions const & options)
{
// transalph is always amino acid, unless in nucleotide_mode
switch (options.indexFileOptions.redAlph)
switch (options.domain)
{
case AlphabetEnum::DNA5:
return realMain<c_indexType, c_origSbjAlph, AlphabetEnum::DNA5, AlphabetEnum::DNA5, AlphabetEnum::DNA5>(
options);
case AlphabetEnum::DNA4:
return realMain<c_indexType, c_origSbjAlph, AlphabetEnum::DNA5, AlphabetEnum::DNA4, AlphabetEnum::DNA5>(
options);
case AlphabetEnum::DNA3BS:
return realMain<c_indexType, c_origSbjAlph, AlphabetEnum::DNA5, AlphabetEnum::DNA3BS, AlphabetEnum::DNA5>(
options);
default:
throw 555;
case domain_t::protein:
switch (options.indexFileOptions.origAlph)
{
case AlphabetEnum::DNA5:
return argConv2<c_indexType, AlphabetEnum::DNA5>(options);
case AlphabetEnum::AMINO_ACID:
return argConv2<c_indexType, AlphabetEnum::AMINO_ACID>(options);
default:
throw 53;
break;
}
break;
case domain_t::nucleotide:
return realMain<c_indexType,
AlphabetEnum::DNA5,
AlphabetEnum::DNA5,
AlphabetEnum::DNA4,
AlphabetEnum::DNA5>(options);
case domain_t::bisulfite:
return realMain<c_indexType,
AlphabetEnum::DNA5,
AlphabetEnum::DNA5,
AlphabetEnum::DNA3BS,
AlphabetEnum::DNA5>(options);
}
}

template <DbIndexType c_indexType, AlphabetEnum c_origSbjAlph>
void argConv2b(LambdaOptions const & options)
void argConv2(LambdaOptions const & options)
{
// transalph is always amino acid, unless in nucleotide_mode
switch (options.indexFileOptions.redAlph)
Expand Down
Loading