Skip to content

Commit

Permalink
Merge pull request #100 from ewallace/nonunique_multimap_options
Browse files Browse the repository at this point in the history
New nonunique options for htseq-count: fraction and random.
  • Loading branch information
iosonofabio authored Apr 11, 2020
2 parents d993000 + 77f7c78 commit c4b8e3e
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 7 deletions.
14 changes: 11 additions & 3 deletions python2/HTSeq/scripts/count.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import warnings
import traceback
import os.path
import random

import HTSeq

Expand Down Expand Up @@ -298,6 +299,12 @@ def write_to_samout(r, assignment, samoutfile):
elif multimapped_mode == 'all':
for fsi in list(fs):
counts[fsi] += 1
elif multimapped_mode == 'fraction':
for fsi in list(fs):
counts[fsi] += 1.0 / len(fs)
elif multimapped_mode == 'random':
fsi = random.choice(fs)
counts[fsi] += 1
else:
sys.exit("Illegal multimap mode.")

Expand Down Expand Up @@ -432,9 +439,10 @@ def main():

pa.add_argument(
"--nonunique", dest="nonunique", type=str,
choices=("none", "all"), default="none",
help="Whether to score reads that are not uniquely aligned " +
"or ambiguously assigned to features")
choices=("none", "all", "fraction", "random"), default="none",
help="Whether and how to score reads that are not uniquely aligned " +
"or ambiguously assigned to features " +
"(choices: none, all, fraction, random; default: none)")

pa.add_argument(
"--secondary-alignments", dest="secondary_alignments", type=str,
Expand Down
14 changes: 11 additions & 3 deletions python3/HTSeq/scripts/count.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import os.path
import multiprocessing
import pysam
import random

import HTSeq

Expand Down Expand Up @@ -277,6 +278,12 @@ def write_to_samout(r, assignment, samoutfile, template=None):
elif multimapped_mode == 'all':
for fsi in list(fs):
counts[fsi] += 1
elif multimapped_mode == 'fraction':
for fsi in list(fs):
counts[fsi] += 1.0 / len(fs)
elif multimapped_mode == 'random':
fsi = random.choice(fs)
counts[fsi] += 1
else:
sys.exit("Illegal multimap mode.")

Expand Down Expand Up @@ -596,9 +603,10 @@ def main():

pa.add_argument(
"--nonunique", dest="nonunique", type=str,
choices=("none", "all"), default="none",
help="Whether to score reads that are not uniquely aligned " +
"or ambiguously assigned to features")
choices=("none", "all", "fraction", "random"), default="none",
help="Whether and how to score reads that are not uniquely aligned " +
"or ambiguously assigned to features " +
"(choices: none, all, fraction, random; default: none)")

pa.add_argument(
"--secondary-alignments", dest="secondary_alignments", type=str,
Expand Down
12 changes: 11 additions & 1 deletion python3/doc/count.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,19 @@ the ``--nonunique`` option:
read (or read pair) aligns to more than one location in the reference, it is
scored as ``alignment_not_unique`` and also separately for each location.

* ``--nonunique fraction``: the read (or read pair) is counted as ``ambiguous``
and is also counted fractionally in all features to which it was assigned. For
example, if the read overlaps with 3 features, it will be counted 1/3 to each of them.

* ``--nonunique random``: the read (or read pair) is counted as ``ambiguous``
and is also counted uniformly at random to ``one of`` the features to which it was
assigned.

Notice that when using ``--nonunique all`` the sum of all counts will not
be equal to the number of reads (or read pairs), because those with multiple
alignments or overlaps get scored multiple times.
alignments or overlaps get scored multiple times. By contrast, with
``--nonunique fraction`` or ``--nonunique random``, the sum of all counts
will be equal to the number of reads (or read pairs).

The following figure illustrates the effect of these three modes and the
``--nonunique`` option:
Expand Down

0 comments on commit c4b8e3e

Please sign in to comment.