Skip to content

Commit

Permalink
Allow --mark-ins and --mark-snv with a character, similarly to `-…
Browse files Browse the repository at this point in the history
…-mark-del`
  • Loading branch information
pd3 committed Apr 7, 2023
1 parent ff15b79 commit 22cfb6e
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 17 deletions.
2 changes: 2 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ Changes affecting specific commands:

- Support higher-ploidy genotypes with `-H, --haplotype` (#1892)

- Allow `--mark-ins` and `--mark-snv` with a character, similarly to `--mark-del`

* bcftools merge

- New `-M, --missing-rules` option to control the behavior of merging of vector tags
Expand Down
40 changes: 27 additions & 13 deletions consensus.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@
#define PICK_SHORT 8
#define PICK_IUPAC 16

#define TO_UPPER 0
#define TO_LOWER 1
#define TO_UPPER 1
#define TO_LOWER 2

typedef struct
{
Expand Down Expand Up @@ -466,25 +466,37 @@ static char *mark_del(char *ref, int rlen, char *alt, int mark)
static void mark_ins(char *ref, char *alt, char mark)
{
int i, nref = strlen(ref), nalt = strlen(alt);
if ( mark=='l' )
if ( mark==TO_LOWER )
for (i=nref; i<nalt; i++) alt[i] = tolower(alt[i]);
else
else if ( mark==TO_UPPER )
for (i=nref; i<nalt; i++) alt[i] = toupper(alt[i]);
else if ( mark )
for (i=nref; i<nalt; i++) alt[i] = mark;
}
static void mark_snv(char *ref, char *alt, char mark)
{
int i, nref = strlen(ref), nalt = strlen(alt);
int n = nref < nalt ? nref : nalt;
if ( mark=='l' )
if ( mark==TO_LOWER )
{
for (i=0; i<n; i++)
if ( tolower(ref[i])!=tolower(alt[i]) ) alt[i] = tolower(alt[i]);
}
else
else if ( mark==TO_UPPER)
{
for (i=0; i<n; i++)
if ( tolower(ref[i])!=tolower(alt[i]) ) alt[i] = toupper(alt[i]);
}
else if ( mark==TO_UPPER)
{
for (i=0; i<n; i++)
if ( tolower(ref[i])!=tolower(alt[i]) ) alt[i] = toupper(alt[i]);
}
else if ( mark )
{
for (i=0; i<n; i++)
if ( tolower(ref[i])!=tolower(alt[i]) ) alt[i] = mark;
}
}
static void iupac_init(args_t *args, bcf1_t *rec)
{
Expand Down Expand Up @@ -1108,9 +1120,9 @@ static void usage(args_t *args)
fprintf(stderr, " NpIu: index of the allele for phased and IUPAC code for unphased GTs (e.g. \"2pIu\")\n");
fprintf(stderr, " -i, --include EXPR Select sites for which the expression is true (see man page for details)\n");
fprintf(stderr, " -I, --iupac-codes Output IUPAC codes based on FORMAT/GT, use -s/-S to subset samples\n");
fprintf(stderr, " --mark-del CHAR Instead of removing sequence, insert CHAR for deletions\n");
fprintf(stderr, " --mark-ins uc|lc Highlight insertions in uppercase (uc) or lowercase (lc), leaving the rest as is\n");
fprintf(stderr, " --mark-snv uc|lc Highlight substitutions in uppercase (uc) or lowercase (lc), leaving the rest as is\n");
fprintf(stderr, " --mark-del CHAR Instead of removing sequence, insert character CHAR for deletions\n");
fprintf(stderr, " --mark-ins uc|lc|CHAR Highlight insertions in uppercase (uc), lowercase (lc), or use CHAR, leaving the rest as is\n");
fprintf(stderr, " --mark-snv uc|lc|CHAR Highlight substitutions in uppercase (uc), lowercase (lc), or use CHAR, leaving the rest as is\n");
fprintf(stderr, " -m, --mask FILE Replace regions according to the next --mask-with option. The default is --mask-with N\n");
fprintf(stderr, " --mask-with CHAR|uc|lc Replace with CHAR (skips overlapping variants); change to uppercase (uc) or lowercase (lc)\n");
fprintf(stderr, " -M, --missing CHAR Output CHAR instead of skipping a missing genotype \"./.\"\n");
Expand Down Expand Up @@ -1162,13 +1174,15 @@ int main_consensus(int argc, char *argv[])
{
case 1 : args->mark_del = optarg[0]; break;
case 2 :
if ( !strcasecmp(optarg,"uc") ) args->mark_ins = 'u';
else if ( !strcasecmp(optarg,"lc") ) args->mark_ins = 'l';
if ( !strcasecmp(optarg,"uc") ) args->mark_ins = TO_UPPER;
else if ( !strcasecmp(optarg,"lc") ) args->mark_ins = TO_LOWER;
else if ( !optarg[1] && optarg[0]>32 && optarg[0]<127 ) args->mark_ins = optarg[0];
else error("The argument is not recognised: --mark-ins %s\n",optarg);
break;
case 3 :
if ( !strcasecmp(optarg,"uc") ) args->mark_snv = 'u';
else if ( !strcasecmp(optarg,"lc") ) args->mark_snv = 'l';
if ( !strcasecmp(optarg,"uc") ) args->mark_snv = TO_UPPER;
else if ( !strcasecmp(optarg,"lc") ) args->mark_snv = TO_LOWER;
else if ( !optarg[1] && optarg[0]>32 && optarg[0]<127 ) args->mark_snv = optarg[0];
else error("The argument is not recognised: --mark-snv %s\n",optarg);
break;
case 'p': args->chr_prefix = optarg; break;
Expand Down
8 changes: 4 additions & 4 deletions doc/bcftools.txt
Original file line number Diff line number Diff line change
Expand Up @@ -957,13 +957,13 @@ Note that the *-H, --haplotype* option requires the *-s, --samples* option, unle
from REF,ALT columns and sample genotypes were not considered.

*--mark-del* 'CHAR'::
instead of removing sequence, insert CHAR for deletions
instead of removing sequence, insert character CHAR for deletions

*--mark-ins* 'uc'|'lc'::
highlight inserted sequence in uppercase (uc) or lowercase (lc), leaving the rest of the sequence as is
*--mark-ins* 'uc'|'lc'|'CHAR'::
highlight inserted sequence in uppercase (uc), lowercase (lc), or a provided character CHAR, leaving the rest of the sequence as is

*--mark-snv* 'uc'|'lc'::
highlight substitutions in uppercase (uc) or lowercase (lc), leaving the rest of the sequence as is
highlight substitutions in uppercase (uc), lowercase (lc), or a provided character CHAR, leaving the rest of the sequence as is

*-m, --mask* 'FILE'::
BED file or TAB file with regions to be replaced with N (the default) or as specified by
Expand Down
20 changes: 20 additions & 0 deletions test/consensus.19.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
>1:2-501
TAC:A:AT:Tga::t+++AT:AaAAAGAACATAACCTACGTATCAACTAAAGTGGTTGTT
TG:AGAAAAGGAAGACTTAAAAAGAGTCAGTACTAACCTACATAATATATACAATGTTCA
TTAAATAATAAAATGAGCTCATCATACTTAGGTCATCATAAATATATCTGAAATTCACAA
ATATTGATCAAATGGTAAAATAGACAAGTAGATTTTAATAGGTTAAACAATTACTGATTC
TCTTGAAAGAATAAATTTAATATGAGACCTATTTCATTATAATGAACTCACAAATTAGAA
ACTTCACACTGGGGGCTGGAGAGATGGCTCAGTAGTTAAGAACACTGACTGCTCTTCTGA
AGGTCCTGAGTTCAAATCCCAGCAACCACATGGTGACTTACAACCATCTGTAATGACATC
TGATGCCCTCTGGTGTGTCTGAAGACAGCTACAGTGTACTTACATAAAATAATAAATAAA
TCTTTAAAAACAAAAAAAAAGAA
>2
gaagatcttttccttattaaggatctgaagctctgtagatttgtattctattaaacatgg
A:::attagtgattttccatattctttaagtcattttagagtaatgtgttcttaagat::
:tcagaaaaacaaaaacttgtgctttcctgtttgaaaaacaaacagctgtggggaatgG+
+++++++tgtcgggacagcctttttatA----------aaataatgttgaggctttgata
cgtcaaagttatatttcaaatggaatcacttagacctcgtttctgagtgtcaatggccat
attggggAtttgctgctgccaatgacaGcacaccctgggaatgccccaactacttactac
aaagcagtgttacatggagaagatcttcaagagtctttttgctagatctttccttggctt
ttgatgtgactcctctcaataaaatccacagtaatatagtgagtggtctcctgctccaaa
ccagtatt:cagacacagttaatccagac
1 change: 1 addition & 0 deletions test/test.pl
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,7 @@
run_test(\&test_vcf_consensus,$opts,in=>'consensus.13',out=>'consensus.13.out',fa=>'consensus.13.fa',args=>'-s -');
run_test(\&test_vcf_consensus,$opts,in=>'consensus.14',out=>'consensus.14.out',fa=>'consensus.14.fa',args=>'-s -');
run_test(\&test_vcf_consensus,$opts,in=>'consensus.12',out=>'consensus.15.out',fa=>'consensus.12.fa',args=>'-s - --mark-del - --mark-ins uc --mark-snv uc');
run_test(\&test_vcf_consensus,$opts,in=>'consensus.12',out=>'consensus.19.out',fa=>'consensus.12.fa',args=>'-s - --mark-del - --mark-ins + --mark-snv :');
run_test(\&test_vcf_consensus,$opts,in=>'consensus.15',out=>'consensus.17.out',fa=>'consensus.15.fa',args=>'-H I --mark-ins lc --mark-snv lc');
run_test(\&test_vcf_consensus,$opts,in=>'consensus.16',out=>'consensus.18.out',fa=>'consensus.fa',args=>'-s - -I');
run_test(\&test_vcf_consensus,$opts,in=>'consensus.16',out=>'consensus.18.out',fa=>'consensus.fa',args=>'-H I');
Expand Down

0 comments on commit 22cfb6e

Please sign in to comment.