Skip to content

Commit dba2169

Browse files
committed
add searchbs and mkindexbs as proper commands
1 parent d11d318 commit dba2169

File tree

7 files changed

+269
-179
lines changed

7 files changed

+269
-179
lines changed

src/lambda.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,13 @@ int main(int argc, char const ** argv)
6666

6767
--until; // undo the "+ 1" above
6868

69-
if ((std::string(argv[until]) == "searchp") || (std::string(argv[until]) == "searchn"))
69+
std::string const subcommand_actual = argv[until];
70+
71+
if (subcommand_actual.starts_with("search"))
7072
{
7173
return searchMain(argc - until, argv + until);
7274
}
73-
else if ((std::string(argv[until]) == "mkindexp") || (std::string(argv[until]) == "mkindexn"))
75+
else if (subcommand_actual.starts_with("mkindex"))
7476
{
7577
return mkindexMain(argc - until, argv + until);
7678
}
@@ -88,24 +90,27 @@ void parseCommandLineMain(int argc, char const ** argv)
8890
sharg::parser parser("lambda3", argc, argv, sharg::update_notifications::off);
8991

9092
parser.info.short_description = "Lambda, the Local Aligner for Massive Biological DatA.";
91-
parser.info.synopsis.push_back("[\\fIOPTIONS\\fP] COMMAND [\\fICOMMAND-OPTIONS\\fP]");
93+
parser.info.synopsis.push_back("lambda3 [\\fIOPTIONS\\fP] COMMAND [\\fICOMMAND-OPTIONS\\fP]");
9294

9395
sharedSetup(parser);
9496

9597
std::string command{};
9698
parser.add_positional_option(
9799
command,
98100
sharg::config{
99-
.description = "The sub-program to execute. See below.",
100-
.validator = sharg::value_list_validator{"searchp", "searchn", "mkindexp", "mkindexn"}
101+
.description = "The sub-program to execute. See above.",
102+
.validator =
103+
sharg::value_list_validator{"searchp", "searchn", "searchbs", "mkindexp", "mkindexn", "mkindexbs"}
101104
});
102105

103106
parser.info.description.push_back("Available commands");
104107
parser.info.description.push_back(
105108
"\\fBsearchp \\fP– Perform a protein search (BLASTP, BLASTX, TBLASTN, TBLASTX).");
106109
parser.info.description.push_back("\\fBsearchn \\fP– Perform a nucleotide search (BLASTN, MEGABLAST).");
110+
parser.info.description.push_back("\\fBsearchbs \\fP– Perform a bisulfite search.");
107111
parser.info.description.push_back("\\fBmkindexp \\fP– Create an index for protein searches.");
108112
parser.info.description.push_back("\\fBmkindexn \\fP– Create an index for nucleotide searches.");
113+
parser.info.description.push_back("\\fBmkindexbs\\fP– Create an index for bisulfite searches.");
109114
parser.info.description.push_back(
110115
"To view the help page for a specific command, simply run 'lambda command --help'.");
111116

src/mkindex_options.hpp

Lines changed: 53 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727

2828
#include <sharg/all.hpp>
2929

30+
#include "shared_options.hpp"
31+
3032
// --------------------------------------------------------------------------
3133
// Class LambdaIndexerOptions
3234
// --------------------------------------------------------------------------
@@ -54,17 +56,26 @@ struct LambdaIndexerOptions : public SharedOptions
5456
// INDEXER
5557
void parseCommandLine(LambdaIndexerOptions & options, int argc, char const ** argv)
5658
{
57-
std::string programName = "lambda3-" + std::string(argv[0]);
59+
std::string const subcommand = std::string(argv[0]);
60+
std::string const programName = "lambda3-" + subcommand;
5861

5962
// this is important for option handling:
60-
options.nucleotide_mode = (std::string(argv[0]) == "mkindexn");
63+
if (subcommand == "mkindexp")
64+
options.domain = domain_t::protein;
65+
else if (subcommand == "mkindexn")
66+
options.domain = domain_t::nucleotide;
67+
else if (subcommand == "mkindexbs")
68+
options.domain = domain_t::bisulfite;
69+
else
70+
throw std::runtime_error{"Unknown subcommand."};
6171

6272
sharg::parser parser(programName, argc, argv, sharg::update_notifications::off);
6373

6474
parser.info.short_description = "the Local Aligner for Massive Biological DatA";
6575

6676
// Define usage line and long description.
67-
parser.info.synopsis.push_back("[\\fIOPTIONS\\fP] \\-d DATABASE.fasta [-i INDEX.lba]\\fP");
77+
parser.info.synopsis.push_back("lambda3 "s + subcommand +
78+
" [\\fIOPTIONS\\fP] \\-d DATABASE.fasta [-i INDEX.lba]\\fP");
6879

6980
parser.info.description.push_back("This is the indexer command for creating lambda-compatible databases.");
7081

@@ -162,51 +173,45 @@ void parseCommandLine(LambdaIndexerOptions & options, int argc, char const ** ar
162173
std::string alphabetReductionTmp;
163174
int geneticCodeTmp = 1;
164175

165-
if (options.nucleotide_mode)
166-
{
167-
alphabetReductionTmp = "dna4";
168-
options.indexFileOptions.origAlph = AlphabetEnum::DNA5;
169-
options.indexFileOptions.transAlph = AlphabetEnum::DNA5;
170-
options.indexFileOptions.redAlph = AlphabetEnum::DNA4;
171-
172-
parser.add_section("Alphabet reduction");
173-
174-
parser.add_option(alphabetReductionTmp,
175-
sharg::config{
176-
.short_id = 'r',
177-
.long_id = "alphabet-reduction",
178-
.description = "Alphabet Reduction for seeding phase.",
179-
.advanced = true,
180-
.validator = sharg::value_list_validator{"none", "dna4", "dna3bs"}
181-
});
182-
}
183-
else
176+
switch (options.domain)
184177
{
185-
alphabetReductionTmp = "li10";
186-
options.indexFileOptions.origAlph = AlphabetEnum::UNDEFINED;
187-
options.indexFileOptions.transAlph = AlphabetEnum::AMINO_ACID;
188-
options.indexFileOptions.redAlph = AlphabetEnum::LI10;
189-
190-
parser.add_section("Alphabet and Translation");
191-
192-
parser.add_option(inputAlphabetTmp,
193-
sharg::config{
194-
.short_id = 'a',
195-
.long_id = "input-alphabet",
196-
.description = "Alphabet of the database sequences (specify to override auto-detection); "
197-
"if input is Dna, it will be translated.",
198-
.advanced = true,
199-
.validator = sharg::value_list_validator{"auto", "dna5", "aminoacid"}
200-
});
201-
202-
parser.add_option(alphabetReductionTmp,
203-
sharg::config{
204-
.short_id = 'r',
205-
.long_id = "alphabet-reduction",
206-
.description = "Alphabet Reduction for seeding phase.",
207-
.advanced = true,
208-
.validator = sharg::value_list_validator{"none", "murphy10", "li10"}
209-
});
178+
case domain_t::protein:
179+
alphabetReductionTmp = "li10";
180+
options.indexFileOptions.origAlph = AlphabetEnum::UNDEFINED;
181+
options.indexFileOptions.transAlph = AlphabetEnum::AMINO_ACID;
182+
options.indexFileOptions.redAlph = AlphabetEnum::LI10;
183+
184+
parser.add_section("Alphabet and Translation");
185+
186+
parser.add_option(inputAlphabetTmp,
187+
sharg::config{
188+
.short_id = 'a',
189+
.long_id = "input-alphabet",
190+
.description = "Alphabet of the database sequences (specify to override "
191+
"auto-detection); if input is Dna, it will be translated.",
192+
.advanced = true,
193+
.validator = sharg::value_list_validator{"auto", "dna5", "aminoacid"}
194+
});
195+
196+
parser.add_option(alphabetReductionTmp,
197+
sharg::config{
198+
.short_id = 'r',
199+
.long_id = "alphabet-reduction",
200+
.description = "Alphabet Reduction for seeding phase.",
201+
.advanced = true,
202+
.validator = sharg::value_list_validator{"none", "murphy10", "li10"}
203+
});
204+
break;
205+
case domain_t::nucleotide:
206+
options.indexFileOptions.origAlph = AlphabetEnum::DNA5;
207+
options.indexFileOptions.transAlph = AlphabetEnum::DNA5;
208+
options.indexFileOptions.redAlph = AlphabetEnum::DNA4;
209+
break;
210+
case domain_t::bisulfite:
211+
options.indexFileOptions.origAlph = AlphabetEnum::DNA5;
212+
options.indexFileOptions.transAlph = AlphabetEnum::DNA5;
213+
options.indexFileOptions.redAlph = AlphabetEnum::DNA3BS;
214+
break;
210215
}
211216

212217
parser.add_section("Remarks");
@@ -222,7 +227,7 @@ void parseCommandLine(LambdaIndexerOptions & options, int argc, char const ** ar
222227
options.indexFileOptions.indexType = DbIndexType::FM_INDEX;
223228

224229
// set options for protein alphabet, genetic code and alphabet reduction
225-
if (!options.nucleotide_mode)
230+
if (options.domain == domain_t::protein)
226231
{
227232
options.indexFileOptions.origAlph = _alphabetNameToEnum(inputAlphabetTmp);
228233
if (alphabetReductionTmp == "none")
@@ -231,13 +236,6 @@ void parseCommandLine(LambdaIndexerOptions & options, int argc, char const ** ar
231236
options.indexFileOptions.redAlph = _alphabetNameToEnum(alphabetReductionTmp);
232237
options.indexFileOptions.geneticCode = static_cast<bio::alphabet::genetic_code>(geneticCodeTmp);
233238
}
234-
else
235-
{
236-
if (alphabetReductionTmp == "none")
237-
options.indexFileOptions.redAlph = AlphabetEnum::DNA5;
238-
else
239-
options.indexFileOptions.redAlph = _alphabetNameToEnum(alphabetReductionTmp);
240-
}
241239

242240
setEnv("TMPDIR", options.tmpdir);
243241

src/search.cpp

Lines changed: 44 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,7 @@ template <DbIndexType c_indexType>
5454
void argConv1(LambdaOptions const & options);
5555

5656
template <DbIndexType c_indexType, AlphabetEnum c_origSbjAlph>
57-
void argConv2a(LambdaOptions const & options);
58-
59-
template <DbIndexType c_indexType, AlphabetEnum c_origSbjAlph>
60-
void argConv2b(LambdaOptions const & options);
57+
void argConv2(LambdaOptions const & options);
6158

6259
template <DbIndexType c_indexType, AlphabetEnum c_origSbjAlph, AlphabetEnum c_transAlph, AlphabetEnum c_redAlph>
6360
void argConv3(LambdaOptions const & options);
@@ -175,13 +172,24 @@ void argConv0(LambdaOptions & options)
175172
myPrint(options, 2, " reduced alphabet: ", _alphabetEnumToName(options.indexFileOptions.redAlph), "\n\n");
176173
}
177174

178-
if ((options.nucleotide_mode) && (options.indexFileOptions.redAlph != AlphabetEnum::DNA5 &&
179-
options.indexFileOptions.redAlph != AlphabetEnum::DNA4 &&
180-
options.indexFileOptions.redAlph != AlphabetEnum::DNA3BS))
175+
switch (options.domain)
181176
{
182-
throw std::runtime_error(
183-
"You are attempting a nucleotide search on a protein index. "
184-
"Did you want to use 'lambda3 searchp' instead?");
177+
case domain_t::protein:
178+
if (options.indexFileOptions.transAlph != AlphabetEnum::AMINO_ACID)
179+
throw std::runtime_error{"Attempting to use nucleotide or bisulfite index for protein search."};
180+
break;
181+
case domain_t::nucleotide:
182+
if (options.indexFileOptions.transAlph != AlphabetEnum::DNA5)
183+
throw std::runtime_error{"Attempting to use protein index for nucleotide search."};
184+
if (options.indexFileOptions.redAlph != AlphabetEnum::DNA4)
185+
throw std::runtime_error{"Attempting to use bisulfite index for nucleotide search."};
186+
break;
187+
case domain_t::bisulfite:
188+
if (options.indexFileOptions.transAlph != AlphabetEnum::DNA5)
189+
throw std::runtime_error{"Attempting to use protein index for bisulfite search."};
190+
if (options.indexFileOptions.redAlph != AlphabetEnum::DNA3BS)
191+
throw std::runtime_error{"Attempting to use nucleotid index for bisulfite search."};
192+
break;
185193
}
186194

187195
// query file
@@ -249,46 +257,37 @@ void argConv0(LambdaOptions & options)
249257
template <DbIndexType c_indexType>
250258
void argConv1(LambdaOptions const & options)
251259
{
252-
if (options.nucleotide_mode)
253-
{
254-
return argConv2a<c_indexType, AlphabetEnum::DNA5>(options);
255-
}
256-
else
257-
{
258-
switch (options.indexFileOptions.origAlph)
259-
{
260-
case AlphabetEnum::DNA5:
261-
return argConv2b<c_indexType, AlphabetEnum::DNA5>(options);
262-
case AlphabetEnum::AMINO_ACID:
263-
return argConv2b<c_indexType, AlphabetEnum::AMINO_ACID>(options);
264-
default:
265-
throw 53;
266-
}
267-
}
268-
}
269-
270-
template <DbIndexType c_indexType, AlphabetEnum c_origSbjAlph>
271-
void argConv2a(LambdaOptions const & options)
272-
{
273-
// transalph is always amino acid, unless in nucleotide_mode
274-
switch (options.indexFileOptions.redAlph)
260+
switch (options.domain)
275261
{
276-
case AlphabetEnum::DNA5:
277-
return realMain<c_indexType, c_origSbjAlph, AlphabetEnum::DNA5, AlphabetEnum::DNA5, AlphabetEnum::DNA5>(
278-
options);
279-
case AlphabetEnum::DNA4:
280-
return realMain<c_indexType, c_origSbjAlph, AlphabetEnum::DNA5, AlphabetEnum::DNA4, AlphabetEnum::DNA5>(
281-
options);
282-
case AlphabetEnum::DNA3BS:
283-
return realMain<c_indexType, c_origSbjAlph, AlphabetEnum::DNA5, AlphabetEnum::DNA3BS, AlphabetEnum::DNA5>(
284-
options);
285-
default:
286-
throw 555;
262+
case domain_t::protein:
263+
switch (options.indexFileOptions.origAlph)
264+
{
265+
case AlphabetEnum::DNA5:
266+
return argConv2<c_indexType, AlphabetEnum::DNA5>(options);
267+
case AlphabetEnum::AMINO_ACID:
268+
return argConv2<c_indexType, AlphabetEnum::AMINO_ACID>(options);
269+
default:
270+
throw 53;
271+
break;
272+
}
273+
break;
274+
case domain_t::nucleotide:
275+
return realMain<c_indexType,
276+
AlphabetEnum::DNA5,
277+
AlphabetEnum::DNA5,
278+
AlphabetEnum::DNA4,
279+
AlphabetEnum::DNA5>(options);
280+
case domain_t::bisulfite:
281+
return realMain<c_indexType,
282+
AlphabetEnum::DNA5,
283+
AlphabetEnum::DNA5,
284+
AlphabetEnum::DNA3BS,
285+
AlphabetEnum::DNA5>(options);
287286
}
288287
}
289288

290289
template <DbIndexType c_indexType, AlphabetEnum c_origSbjAlph>
291-
void argConv2b(LambdaOptions const & options)
290+
void argConv2(LambdaOptions const & options)
292291
{
293292
// transalph is always amino acid, unless in nucleotide_mode
294293
switch (options.indexFileOptions.redAlph)

0 commit comments

Comments
 (0)