Skip to content

Commit

Permalink
review fixups
Browse files Browse the repository at this point in the history
  • Loading branch information
nh13 committed Nov 17, 2022
1 parent b640792 commit 7542c8d
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 49 deletions.
122 changes: 83 additions & 39 deletions src/lib/opts.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#![forbid(unsafe_code)]

use std::{num::NonZeroUsize, path::PathBuf, str::FromStr, vec::Vec};
use std::{cmp::Ordering, num::NonZeroUsize, path::PathBuf, str::FromStr, vec::Vec};

use anyhow::{anyhow, Result};
use anyhow::{anyhow, bail, Result};
use clap::Parser;
use env_logger::Env;
use read_structure::{ReadSegment, ReadStructure, SegmentType};
Expand Down Expand Up @@ -87,7 +87,7 @@ pub struct Opts {
pub sample_metadata: PathBuf,

/// Read structures, one per input FASTQ. Do not provide when using a path prefix for FASTQs.
#[clap(long, short = 'r', display_order = 3, required = true, multiple_values = true)]
#[clap(long, short = 'r', display_order = 3, multiple_values = true)]
pub read_structures: Vec<ReadStructure>,

/// The directory to write outputs to.
Expand Down Expand Up @@ -282,27 +282,36 @@ impl Opts {
let read_length: usize = infer_fastq_sequence_length(&fastq)?;
let fixed_length: usize =
read_structure.iter().map(|s| s.length().unwrap_or(0)).sum();

let read_segments: Vec<ReadSegment> = read_structure
.iter()
.filter_map(|s| {
if s.has_length() {
Some(*s)
} else if fixed_length == read_length {
None
} else {
Some(
ReadSegment::from_str(&format!(
"{}{}",
read_length - fixed_length,
s.kind.value()
))
.unwrap(),
)
}
})
.collect();
read_structures.push(ReadStructure::new(read_segments)?);
match fixed_length.cmp(&read_length) {
Ordering::Greater => bail!(
"Read length ({}) is too short ({}) for the read structure {} in {:?}",
read_length,
fixed_length,
read_structure,
fastq
),
Ordering::Equal => bail!("Variable length sample barcode would be zero (read length: {}, sum of fixed segment lengths: {}) in FASTQ: {:?}",
read_length, fixed_length, fastq
),
Ordering::Less => {
let read_segments: Vec<ReadSegment> = read_structure
.iter()
.map(|s| {
if s.has_length() {
*s
} else {
ReadSegment::from_str(&format!(
"{}{}",
read_length - fixed_length,
s.kind.value()
))
.unwrap()
}
})
.collect();
read_structures.push(ReadStructure::new(read_segments)?);
}
}
}
}
Ok(Opts { read_structures, ..self })
Expand Down Expand Up @@ -376,8 +385,20 @@ mod test {
expected: ReadStructure,
}

impl FastqDef {
fn write(&self) {
let writer = BufWriter::new(File::create(&self.fastq).unwrap());
let mut gz_writer = BgzfSyncWriter::new(writer, Compression::new(3));
let bases = String::from_utf8(vec![b'A'; self.read_length]).unwrap();
let quals = String::from_utf8(vec![b'I'; self.read_length]).unwrap();
let data = format!("@NAME\n{}\n+\n{}\n", bases, quals);
gz_writer.write_all(data.as_bytes()).unwrap();
gz_writer.flush().unwrap();
}
}

#[test]
fn test_with_fixed_sample_barcodes() {
fn test_with_fixed_sample_barcodes_ok() {
let dir = tempdir().unwrap();

let fastq_defs = vec![
Expand Down Expand Up @@ -409,24 +430,11 @@ mod test {
read_length: 30,
expected: ReadStructure::from_str("8B10S12B").unwrap(),
},
// updated: read length is short enough that variable length sample barcode is removed
FastqDef {
read_structure: ReadStructure::from_str("8B10S+B").unwrap(),
fastq: dir.path().join("5.fastq.gz"),
read_length: 18,
expected: ReadStructure::from_str("8B10S").unwrap(),
},
];

// Create output FASTQ files
for fastq_def in &fastq_defs {
let writer = BufWriter::new(File::create(&fastq_def.fastq).unwrap());
let mut gz_writer = BgzfSyncWriter::new(writer, Compression::new(3));
let bases = String::from_utf8(vec![b'A'; fastq_def.read_length]).unwrap();
let quals = String::from_utf8(vec![b'I'; fastq_def.read_length]).unwrap();
let data = format!("@NAME\n{}\n+\n{}\n", bases, quals);
gz_writer.write_all(data.as_bytes()).unwrap();
gz_writer.flush().unwrap();
fastq_def.write();
}

// Create the opt, and update it
Expand All @@ -446,4 +454,40 @@ mod test {
assert_eq!(actual.to_string(), expected.to_string());
}
}

#[test]
fn test_with_fixed_sample_barcodes_error() {
let dir = tempdir().unwrap();

let fastq_defs = vec![
// error: read length is short enough that variable length sample barcode is zero
FastqDef {
read_structure: ReadStructure::from_str("8B10S+B").unwrap(),
fastq: dir.path().join("5.fastq.gz"),
read_length: 18,
expected: ReadStructure::from_str("+B").unwrap(), // ignore
},
// error: read length is short enough that leading fixed lengths are too short
FastqDef {
read_structure: ReadStructure::from_str("8B10S+B").unwrap(),
fastq: dir.path().join("5.fastq.gz"),
read_length: 17,
expected: ReadStructure::from_str("+B").unwrap(), // ignore
},
];

// Create output FASTQ files
for fastq_def in &fastq_defs {
fastq_def.write();

// Create the opt, and update it
let result = Opts {
read_structures: vec![fastq_def.read_structure.clone()],
fastqs: vec![fastq_def.fastq.clone()],
..Opts::default()
}
.with_fixed_sample_barcodes();
assert!(result.is_err());
}
}
}
14 changes: 13 additions & 1 deletion src/lib/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use crate::{
pooled_sample_writer::PooledSampleWriter,
sample_sheet::{self},
thread_reader::ThreadReader,
utils::{check_bgzf, filenames, InputFastq, MultiZip},
utils::{check_bgzf, filenames, segment_type_to_fastq_kind, InputFastq, MultiZip},
};

/// Run demultiplexing.
Expand Down Expand Up @@ -57,6 +57,18 @@ pub fn run(opts: Opts) -> Result<()> {
})
.collect();

// Should only find one FASTQ with the same kind and kind number
for ((kind, kind_number), fastqs) in
&input_fastqs.iter().group_by(|f| (f.kind, f.kind_number))
{
ensure!(
fastqs.count() == 1,
"Found multiple FASTQS with the same kind and kind number ({}{})",
segment_type_to_fastq_kind(&kind),
kind_number
);
}

// build read structures, one per input FASTQ
let read_structures: Vec<ReadStructure> =
input_fastqs.iter().map(InputFastq::read_structure).collect();
Expand Down
17 changes: 8 additions & 9 deletions src/lib/sample_sheet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ impl SampleSheet {

#[cfg(test)]
mod test {
use std::path::PathBuf;
use std::path::{Path, PathBuf};

use crate::opts::Opts;
use crate::sample_sheet::{SampleSheet, SampleSheetError};
Expand Down Expand Up @@ -479,13 +479,14 @@ mod test {

#[test]
fn test_demux_missing_fastqs_and_read_structures() {
// It's ok that we do not have any read structures, as it may be a path prefix.
let records: Vec<StringRecord> = vec![StringRecord::from(vec!["unkonwn"])];

let result = SampleSheet::parse_and_update_demux_options(&records, Opts::default());
assert_matches!(result, Err(SampleSheetError::DemuxOptionsParsing { kind: _, args: _ }));
if let Err(SampleSheetError::DemuxOptionsParsing { kind, args }) = result {
assert_eq!(kind, MissingRequiredArgument.as_str().unwrap());
assert_eq!(args, "--fastqs, --read-structures".to_string());
assert_eq!(args, "--fastqs".to_string());
}
}

Expand All @@ -503,14 +504,12 @@ mod test {

#[test]
fn test_demux_missing_read_structure() {
// It's ok that we do not have any read structures, as it may be a path prefix.
let fastqs = vec![Path::new("/dev/null")];
let records: Vec<StringRecord> = vec![StringRecord::from(vec!["fastqs", "/dev/null"])];

let result = SampleSheet::parse_and_update_demux_options(&records, Opts::default());
assert_matches!(result, Err(SampleSheetError::DemuxOptionsParsing { kind: _, args: _ }));
if let Err(SampleSheetError::DemuxOptionsParsing { kind, args }) = result {
assert_eq!(kind, MissingRequiredArgument.as_str().unwrap());
assert_eq!(args, "--read-structures".to_string());
}
let opts = SampleSheet::parse_and_update_demux_options(&records, Opts::default()).unwrap();
assert_eq!(opts.fastqs, fastqs);
assert!(opts.read_structures.is_empty());
}

#[test]
Expand Down
14 changes: 14 additions & 0 deletions src/lib/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,20 @@ lazy_static! {
static ref INPUT_FASTQ_REGEX: Regex = Regex::new(r"^(.*)_L00(\d)_([RIUS])(\d)_001.fastq.gz$").unwrap();
}

// Returns the character associated with the given `SegmentType` that should be used in a FASTQ file
// name.
pub fn segment_type_to_fastq_kind(segment_type: &SegmentType) -> char {
match segment_type {
SegmentType::Template => 'R',
SegmentType::SampleBarcode => 'I',
SegmentType::MolecularBarcode => 'U',
SegmentType::Skip => 'S',
knd => {
panic!("Unreachable - segment_type should enforce only [TSMB], found {}", knd.value())
}
}
}

/// Contains information about a FASTQ that has been inferred from the file name. This includes
/// lane, segment type (e.g. template/read, sample barcode/index, molecular barcode/index, and
/// skip bases), and segment number (e.g. R1 or R2).
Expand Down

0 comments on commit 7542c8d

Please sign in to comment.