Skip to content

Commit

Permalink
call: new ploidy handling
Browse files Browse the repository at this point in the history
  • Loading branch information
pd3 authored and mcshane committed Jul 29, 2015
1 parent c4a53ac commit 21e745a
Show file tree
Hide file tree
Showing 4 changed files with 329 additions and 160 deletions.
47 changes: 45 additions & 2 deletions doc/bcftools.txt
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,29 @@ specific commands to see if they apply.
File of sample names to include or exclude if prefixed with "^".
One sample per line.
The command *<<call,bcftools call>>* accepts an optional second
column indicating ploidy (0, 1 or 2) and can parse also PED files.
With *<<call,bcftools call>> -C* 'trio', PED file is expected.
column indicating ploidy (0, 1 or 2) or sex (as defined by
*<<ploidy,--ploidy>>*, for example "F" or "M"), and can parse also PED
files. If the second column is not present,
the sex "F" is assumed.
With *<<call,bcftools call>> -C* 'trio', PED file is expected. File
formats examples:
----
sample1 1
sample2 2
sample3 2

or

sample1 M
sample2 F
sample3 F

or a .ped file (here is shown a minimum working example, the first column is
ignored and the last indicates sex: 1=male, 2=female)

ignored daughterA fatherA motherA 2
ignored sonB fatherB motherB 1
----

*-t, --targets* \[&#94;]'chr'|'chr:pos'|'chr:from-to'|'chr:from-'[,...]::
Similar as *-r, --regions*, but the next position is accessed by streaming the
Expand Down Expand Up @@ -370,6 +391,28 @@ demand. The original calling model can be invoked with the *-c* option.
*-O, --output-type* 'b'|'u'|'z'|'v'::
see *<<common_options,Common Options>>*

*--ploidy* 'ASSEMBLY'['?'][[ploidy]]::
predefined ploidy, use 'list' (or any other unused word) to print a list
of all predefined assemblies. Append a question mark to print the actual
definition. See also *--ploidy-file*.

*--ploidy-file* 'FILE'::
ploidy definition given as a space/tab-delimited list of
CHROM, FROM, TO, SEX, PLOIDY. The SEX codes are arbitrary and
correspond to the ones used by *<<samples_file,--samples-file>>*.
The default ploidy can be given using the starred records (see
below), unlisted regions have ploidy 2. The default ploidy definition is
----
X 1 60000 M 1
X 2699521 154931043 M 1
Y 1 59373566 M 1
Y 1 59373566 F 0
MT 1 16569 M 1
MT 1 16569 F 1
* * * M 2
* * * F 2
----

*-r, --regions* 'chr'|'chr:pos'|'chr:from-to'|'chr:from-'[,...]::
see *<<common_options,Common Options>>*

Expand Down
22 changes: 21 additions & 1 deletion ploidy.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,20 @@ struct _ploidy_t
{
int nsex, msex; // number of genders, m:number of allocated elements in id2sex array
int dflt, min, max; // ploidy: default, min and max (only explicitly listed)
int *sex2dflt;
regidx_t *idx;
void *sex2id;
char **id2sex;
kstring_t tmp_str;
};

typedef struct
{
int sex, ploidy;
}
sex_ploidy_t;


regidx_t *ploidy_regions(ploidy_t *ploidy)
{
return ploidy->idx;
Expand Down Expand Up @@ -78,6 +86,8 @@ int ploidy_parse(const char *line, char **chr_beg, char **chr_end, reg_t *reg, v
hts_expand0(char*,ploidy->nsex,ploidy->msex,ploidy->id2sex);
ploidy->id2sex[ploidy->nsex-1] = strdup(ploidy->tmp_str.s);
sp->sex = khash_str2int_inc(ploidy->sex2id, ploidy->id2sex[ploidy->nsex-1]);
ploidy->sex2dflt = (int*) realloc(ploidy->sex2dflt,sizeof(int)*ploidy->nsex);
ploidy->sex2dflt[ploidy->nsex-1] = ploidy->dflt;
}

ss = se;
Expand All @@ -88,6 +98,15 @@ int ploidy_parse(const char *line, char **chr_beg, char **chr_end, reg_t *reg, v
if ( sp->ploidy < ploidy->min ) ploidy->min = sp->ploidy;
if ( sp->ploidy > ploidy->max ) ploidy->max = sp->ploidy;

// Special case, chr="*" stands for a default value
ss = (char*) line;
while ( *ss && isspace(*ss) ) ss++;
if ( ss[0]=='*' && (!ss[1] || isspace(ss[1])) )
{
ploidy->sex2dflt[ploidy->nsex-1] = sp->ploidy;
return -1;
}

return 0;
}

Expand Down Expand Up @@ -141,6 +160,7 @@ void ploidy_destroy(ploidy_t *ploidy)
if ( ploidy->idx ) regidx_destroy(ploidy->idx);
free(ploidy->id2sex);
free(ploidy->tmp_str.s);
free(ploidy->sex2dflt);
free(ploidy);
}

Expand All @@ -157,7 +177,7 @@ int ploidy_query(ploidy_t *ploidy, char *seq, int pos, int *sex2ploidy, int *min
if ( min ) *min = ploidy->dflt;
if ( max ) *max = ploidy->dflt;
if ( sex2ploidy )
for (i=0; i<ploidy->nsex; i++) sex2ploidy[i] = ploidy->dflt;
for (i=0; i<ploidy->nsex; i++) sex2ploidy[i] = ploidy->sex2dflt[i];
return 0;
}

Expand Down
14 changes: 5 additions & 9 deletions ploidy.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,16 @@
ploidy_destroy(pld);
An example of ploidy file format follows. The coordinates are 1-based and
inclusive:
inclusive. The "*" records define the default ploidy for each sex. If not
present, the default_ploidy passed to ploidy_init is used instead:
X 1 60000 M 1
X 2699521 154931043 M 1
Y 1 59373566 M 1
Y 1 59373566 F 0
MT 1 16569 M 1
MT 1 16569 F 1
* * * M 2
* * * F 2
*/

#ifndef __PLOIDY_H__
Expand All @@ -56,17 +59,10 @@

typedef struct _ploidy_t ploidy_t;

typedef struct
{
int sex, ploidy;
}
sex_ploidy_t;


/*
* ploidy_init()
* @param fname: input file name or NULL if default ploidy from example above should be used
* @param dflt: default ploidy to use for unlisted regions
* @param dflt: default ploidy to use for unlisted regions (the '* * *' records have precedence).
*
* Returns new structure on success or NULL on error.
*/
Expand Down
Loading

0 comments on commit 21e745a

Please sign in to comment.