Skip to content

Commit

Permalink
Merge pull request #240 from vasudeva8/44-1
Browse files Browse the repository at this point in the history
GA4GHTT-232 - vcf4.4 initial changes
  • Loading branch information
tcezard authored Mar 20, 2024
2 parents b1d57e5 + 313458b commit ee919cc
Show file tree
Hide file tree
Showing 327 changed files with 39,627 additions and 45 deletions.
8 changes: 8 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ set (MOD_VCF_SOURCES
inc/vcf/validator_detail_v41.hpp
inc/vcf/validator_detail_v42.hpp
inc/vcf/validator_detail_v43.hpp
inc/vcf/validator_detail_v44.hpp
inc/vcf/validator.hpp

src/fasta/faidx.cpp
Expand Down Expand Up @@ -293,6 +294,7 @@ endif ()
set (V41_TESTS test/vcf/parser_v41_test.cpp)
set (V42_TESTS test/vcf/parser_v42_test.cpp)
set (V43_TESTS test/vcf/parser_v43_test.cpp)
set (V44_TESTS test/vcf/parser_v44_test.cpp)
set (ALL_TESTS
test/assembly_report/assembly_report_test.cpp
test/fasta/faidx_test.cpp
Expand All @@ -309,6 +311,7 @@ set (ALL_TESTS
test/vcf/parser_v41_test.cpp
test/vcf/parser_v42_test.cpp
test/vcf/parser_v43_test.cpp
test/vcf/parser_v44_test.cpp
test/vcf/predefined_info_tags_test.cpp
test/vcf/predefined_format_tags_test.cpp
test/vcf/record_cache_test.cpp
Expand All @@ -332,6 +335,11 @@ target_link_libraries (test_validator_v43 ${LIBRARIES_TO_LINK})
enable_testing ()
add_test (NAME ValidatorTests_v43 COMMAND test_validator_v43)

add_executable (test_validator_v44 test/main_test.cpp ${V44_TESTS})
target_link_libraries (test_validator_v44 ${LIBRARIES_TO_LINK})
enable_testing ()
add_test (NAME ValidatorTests_v44 COMMAND test_validator_v44)

add_executable (test_validation_suite test/main_test.cpp ${ALL_TESTS})
target_link_libraries (test_validation_suite ${LIBRARIES_TO_LINK})
enable_testing ()
Expand Down
1 change: 1 addition & 0 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ test_script:
- cmd: bin\test_validator_v41.exe
- cmd: bin\test_validator_v42.exe
- cmd: bin\test_validator_v43.exe
- cmd: bin\test_validator_v44.exe
- cmd: bin\test_validation_suite.exe

artifacts:
Expand Down
4 changes: 4 additions & 0 deletions docs/developer-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ executable binaries:

![architecture image](img/VCF-validator-architecture.png)

In case of dependent libraries being installed in non-standard paths,
update LIBRARY_PATH as shown below
'export LIBRARY_PATH=$LIBRARY_PATH:<path to required library's libpath>'

The role of Ragel and ODB in this project is explained further below.

## Code structure
Expand Down
75 changes: 74 additions & 1 deletion inc/vcf/file_structure.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ namespace ebi
{
v41,
v42,
v43
v43,
v44
};

enum class RecordType
Expand Down Expand Up @@ -159,6 +160,53 @@ namespace ebi
{ THOUSAND_G, { FLAG, "0" } }
};

const std::map<std::string, std::pair<std::string, std::string>> info_v44 = {
{ AA, { STRING, "1" } },
{ AC, { INTEGER, A } },
{ AD, { INTEGER, R } },
{ ADF, { INTEGER, R } },
{ ADR, { INTEGER, R } },
{ AF, { FLOAT, A } },
{ AN, { INTEGER, "1" } },
{ BKPTID, { STRING, UNKNOWN_CARDINALITY } },
{ BQ, { FLOAT, "1" } },
{ CICN, { INTEGER, "2" } },
{ CICNADJ, { INTEGER, UNKNOWN_CARDINALITY } },
{ CIEND, { INTEGER, "2" } },
{ CIGAR, { STRING, A } },
{ CILEN, { INTEGER, "2" } },
{ CIPOS, { INTEGER, "2" } },
{ CN, { INTEGER, "1" } },
{ CNADJ, { INTEGER, UNKNOWN_CARDINALITY } },
{ DB, { FLAG, "0" } },
{ DBRIPID, { STRING, "1" } },
{ DBVARID, { STRING, "1" } },
{ DGVID, { STRING, "1" } },
{ DP, { INTEGER, "1" } },
{ DPADJ, { INTEGER, UNKNOWN_CARDINALITY } },
{ END, { INTEGER, "1" } },
{ EVENT, { STRING, "1" } },
{ HOMLEN, { INTEGER, UNKNOWN_CARDINALITY } },
{ HOMSEQ, { STRING, UNKNOWN_CARDINALITY } },
{ H2, { FLAG, "0" } },
{ H3, { FLAG, "0" } },
{ IMPRECISE, { FLAG, "0" } },
{ MATEID, { STRING, UNKNOWN_CARDINALITY } },
{ MEINFO, { STRING, "4" } },
{ METRANS, { STRING, "4" } },
{ MQ, { MISSING_VALUE, "1" } },
{ MQ0, { INTEGER, "1" } },
{ NOVEL, { FLAG, "0" } },
{ NS, { INTEGER, "1" } },
{ PARID, { STRING, "1" } },
// TODO : SB metadata Type and Number is "."
{ SOMATIC, { FLAG, "0" } },
{ SVLEN, { INTEGER, UNKNOWN_CARDINALITY } },
{ SVTYPE, { STRING, "1" } },
{ VALIDATED, { FLAG, "0" } },
{ THOUSAND_G, { FLAG, "0" } }
};

const std::map<std::string, std::pair<std::string, std::string>> format_v41_v42 = {
{ AHAP, { INTEGER, "1" } },
{ CN, { INTEGER, "1" } },
Expand Down Expand Up @@ -206,6 +254,31 @@ namespace ebi
{ PS, { INTEGER, "1" } }
};

const std::map<std::string, std::pair<std::string, std::string>> format_v44 = {
{ AD, { INTEGER, R } },
{ ADF, { INTEGER, R } },
{ ADR, { INTEGER, R } },
{ AHAP, { INTEGER, "1" } },
{ CN, { INTEGER, "1" } },
{ CNL, { FLOAT, G } },
{ CNP, { FLOAT, G } },
{ CNQ, { FLOAT, "1" } },
{ DP, { INTEGER, "1" } },
{ EC, { INTEGER, A } },
{ FT, { STRING, "1" } },
{ GL, { FLOAT, G } },
{ GP, { FLOAT, G } },
{ GQ, { INTEGER, "1" } },
{ GT, { STRING, "1" } },
{ HAP, { INTEGER, "1" } },
{ HQ, { INTEGER, "2" } },
{ MQ, { INTEGER, "1" } },
{ NQ, { INTEGER, "1" } },
{ PL, { INTEGER, G } },
{ PQ, { INTEGER, "1" } },
{ PS, { INTEGER, "1" } }
};

const std::set<std::string> PREDEFINED_INFO_SVTYPES{
DEL,
INS,
Expand Down
1 change: 1 addition & 0 deletions inc/vcf/string_constants.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ namespace ebi
const std::string VCF_V41 = "VCFv4.1";
const std::string VCF_V42 = "VCFv4.2";
const std::string VCF_V43 = "VCFv4.3";
const std::string VCF_V44 = "VCFv4.4";

// validator, debugulator and assembly_checker command line arguments
const char STDIN[] = "stdin";
Expand Down
23 changes: 23 additions & 0 deletions inc/vcf/validator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,24 @@ namespace ebi
void parse_buffer(char const * p, char const * pe, char const * eof);
};

template <typename Configuration>
class ParserImpl_v44
: public ParserImpl,
Configuration::ParsePolicy,
Configuration::ErrorPolicy,
Configuration::OptionalPolicy
{
public:
using ParsePolicy = typename Configuration::ParsePolicy;
using ErrorPolicy = typename Configuration::ErrorPolicy;
using OptionalPolicy = typename Configuration::OptionalPolicy;

ParserImpl_v44(std::shared_ptr<Source> source, AdditionalChecks additionalChecks);

private:
void parse_buffer(char const * p, char const * pe, char const * eof);
};

// Predefined aliases for common uses of the parser
using QuickValidator_v41 = ParserImpl_v41<QuickValidatorCfg>;
using FullValidator_v41 = ParserImpl_v41<FullValidatorCfg>;
Expand All @@ -185,6 +203,10 @@ namespace ebi
using FullValidator_v43 = ParserImpl_v43<FullValidatorCfg>;
using Reader_v43 = ParserImpl_v43<ReaderCfg>;

using QuickValidator_v44 = ParserImpl_v44<QuickValidatorCfg>;
using FullValidator_v44 = ParserImpl_v44<FullValidatorCfg>;
using Reader_v44 = ParserImpl_v44<ReaderCfg>;

bool is_valid_vcf_file(std::istream &input, const std::string &sourceName,
ValidationLevel validationLevel,
std::vector<std::unique_ptr<ebi::vcf::ReportWriter>> &outputs,
Expand All @@ -210,5 +232,6 @@ namespace ebi
#include "validator_detail_v41.hpp"
#include "validator_detail_v42.hpp"
#include "validator_detail_v43.hpp"
#include "validator_detail_v44.hpp"

#endif // VCF_VALIDATOR_HPP
Loading

0 comments on commit ee919cc

Please sign in to comment.