Skip to content

Commit

Permalink
feat: handle case of BAM without RG tag (#100)
Browse files Browse the repository at this point in the history
* feat: handle case of BAM without RG tag

* chore(instrument): add comment explaining `KeyError`
  • Loading branch information
a-frantz authored Jun 26, 2023
1 parent ff1a14e commit 4436abf
Showing 1 changed file with 27 additions and 15 deletions.
42 changes: 27 additions & 15 deletions ngsderive/commands/instrument.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,22 +227,34 @@ def main(ngsfiles, outfile=sys.stdout, n_samples=10000):
malformed_read_names = False

# accumulate instrument and flowcell IDs
for read in itertools.islice(ngsfile, n_samples):
parts = read["query_name"].split(":")
if len(parts) != 7: # not Illumina format
malformed_read_names = True
iid = parts[0] # attempt to recover machine name
try:
for read in itertools.islice(ngsfile, n_samples):
parts = read["query_name"].split(":")
if len(parts) != 7: # not Illumina format
malformed_read_names = True
iid = parts[0] # attempt to recover machine name
instruments.add(iid)
for rg in ngsfile.handle.header.to_dict()["RG"]:
if rg["ID"] == read["read_group"]:
if "PU" in rg:
flowcells.add(rg["PU"])
if "PM" in rg:
instruments.add(rg["PM"])
continue
iid, fcid = parts[0], parts[2]
instruments.add(iid)
for rg in ngsfile.handle.header.to_dict()["RG"]:
if rg["ID"] == read["read_group"]:
if "PU" in rg:
flowcells.add(rg["PU"])
if "PM" in rg:
instruments.add(rg["PM"])
continue
iid, fcid = parts[0], parts[2]
instruments.add(iid)
flowcells.add(fcid)
flowcells.add(fcid)
except KeyError: # no RG tag is present
result = {
"File": ngsfilepath,
"Instrument": "unknown",
"Confidence": "no confidence",
"Basis": "no RG tag present",
}
writer.writerow(result)
outfile.flush()
continue

if malformed_read_names:
logger.warning(
"Encountered read names not in Illumina format. Recovery attempted."
Expand Down

0 comments on commit 4436abf

Please sign in to comment.