Skip to content

Commit

Permalink
refactor: use pyo3 for eliminate_subtraction
Browse files Browse the repository at this point in the history
  • Loading branch information
SwovelandM authored and igboyes committed Feb 10, 2023
1 parent 9ea0a3b commit 001d7e7
Show file tree
Hide file tree
Showing 14 changed files with 168 additions and 208 deletions.
16 changes: 8 additions & 8 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[package]
name = "virtool_expectation_maximization"
name = "rust_utils"
version = "0.1.0"
edition = "2021"
authors = ["Markus Swoveland"]
Expand Down
19 changes: 6 additions & 13 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,9 +1,4 @@
FROM rust:1.60.0-slim-buster as rust
WORKDIR /build
COPY /utils/eliminate_subtraction/ /build/
RUN cargo build -r

FROM python:3.10-buster as rustExpectMax
FROM python:3.10-buster as rust_utils
WORKDIR /build
RUN apt-get update && apt-get install -y curl build-essential
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
Expand All @@ -16,23 +11,21 @@ RUN maturin build --release
FROM virtool/workflow:5.3.0 as base
WORKDIR /app
RUN pip install --upgrade pip
COPY --from=rust /build/target/release/eliminate_subtraction ./
COPY fixtures.py workflow.py pathoscope.py ./
COPY --from=rustExpectMax /build/target/wheels/virtool_expectation_maximization*.whl ./
COPY --from=rust_utils /build/target/wheels/rust_utils*.whl ./
RUN ls
RUN pip install virtool_expectation_maximization*.whl
RUN pip install rust_utils*.whl

FROM virtool/workflow:5.3.0 as test
WORKDIR /test
RUN pip install --upgrade pip
COPY pyproject.toml poetry.lock ./
RUN curl -sSL https://install.python-poetry.org | python -
COPY --from=rust /build/target/release/eliminate_subtraction ./
COPY tests /test/tests
COPY fixtures.py workflow.py pathoscope.py ./
COPY --from=rustExpectMax /build/target/wheels/virtool_expectation_maximization*.whl ./
RUN pip install virtool_expectation_maximization*.whl
COPY --from=rust_utils /build/target/wheels/rust_utils*.whl ./
RUN pip install rust_utils*.whl
RUN poetry install
RUN poetry add ./virtool_expectation_maximization*.whl
RUN poetry add ./rust_utils*.whl
RUN ls
RUN poetry run pytest
4 changes: 2 additions & 2 deletions pathoscope.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import copy
import csv
import math
import virtool_expectation_maximization
import rust_utils
from functools import cached_property
from pathlib import Path
from typing import Any, Dict, Generator, List
Expand Down Expand Up @@ -522,6 +522,6 @@ def run(sam_path: Path, reassigned_path: Path, p_score_cutoff: float):
# rewriteAlign
# computeBestHit
# + adjacent code
return virtool_expectation_maximization.run(
return rust_utils.run_expectation_maximization(
str(sam_path), str(reassigned_path), p_score_cutoff
)
151 changes: 146 additions & 5 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,157 @@
use eliminate_subtraction::{
check_should_eliminate, find_sam_align_score, parse_subtraction_sam, read_lines,
};
use pyo3::prelude::*;
use std::collections::HashMap;
use std::{
collections::{HashMap, HashSet},
fs::File,
io::Write,
};

#[pymodule]
///pyo3 interface
fn virtool_expectation_maximization(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(run, m)?)?;
fn rust_utils(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(run_expectation_maximization, m)?)?;
m.add_function(wrap_pyfunction!(run_eliminate_subtraction, m)?)?;
return Ok(());
}

#[pyfunction]
///Entry point for the virtool_expectation_maximization python module
pub fn run(
///Entry point for eliminate_subtraction
pub fn run_eliminate_subtraction(
_py: Python,
isolate_sam_path: String,
subtraction_sam_path: String,
output_sam_path: String,
) {
let subtraction_scores = parse_subtraction_sam(&subtraction_sam_path);

if let Ok(lines) = read_lines(isolate_sam_path) {
let mut sam_file = File::create(output_sam_path).unwrap();
let mut subtracted_read_ids: HashSet<String> = HashSet::new();

for line in lines {
if let Ok(l) = line {
match l.chars().next() {
Some(c) => {
if c == '@' || c == '#' {
continue;
}
}
None => continue,
};

let first = l.chars().next().unwrap();

if first == '@' || first == '#' {
continue;
}

let fields: Vec<&str> = l.split("\t").collect();

if fields[2] == "*" {
continue;
}

let score = find_sam_align_score(&fields);

let eliminate = check_should_eliminate(&subtraction_scores, &fields[0], score);

if eliminate {
subtracted_read_ids.insert(fields[0].to_string());
} else {
writeln!(&mut sam_file, "{}", l).unwrap();
}
}
}

let mut subtracted_read_ids_file = File::create("subtracted_read_ids.txt").unwrap();

for read_id in subtracted_read_ids {
writeln!(&mut subtracted_read_ids_file, "{}", read_id).unwrap();
}
}
}

mod eliminate_subtraction {
use std::{
collections::HashMap,
fs::File,
io::{self, BufRead},
path::Path,
};

// Check if the passed read_id should be eliminated if its isolate score is
// higher than the subtraction score.
pub fn check_should_eliminate(
subtraction_scores: &HashMap<String, f32>,
read_id: &str,
score: f32,
) -> bool {
match subtraction_scores.get(read_id) {
Some(subtraction_score) => &subtraction_score >= &&score,
None => false,
}
}

/// Find the Pathoscope alignment score for a SAM line.
///
/// # Arguments
/// * `fields` - The SAM fields as a vector.
///
pub fn find_sam_align_score(fields: &Vec<&str>) -> f32 {
let read_length = fields[9].chars().count() as f32;
let mut a_score: f32 = 0.0;

for field in fields {
if field.starts_with("AS:i:") {
a_score = field[5..].parse().unwrap();
break;
}
}

return a_score + read_length;
}

pub fn parse_subtraction_sam(path: &str) -> HashMap<String, f32> {
let mut high_scores: HashMap<String, f32> = HashMap::new();

if let Ok(lines) = read_lines(path) {
for line in lines {
if let Ok(l) = line {
let first = l.chars().next().unwrap();

if first == '@' || first == '#' {
continue;
}

let fields: Vec<&str> = l.split("\t").collect();

if fields[2] == "*" {
continue;
}

let score = find_sam_align_score(&fields);
high_scores.insert(fields[0].to_string(), score);
}
}
}

return high_scores;
}

pub fn read_lines<P>(filename: P) -> io::Result<io::Lines<io::BufReader<File>>>
where
P: AsRef<Path>,
{
let file = File::open(filename)?;
Ok(io::BufReader::new(file).lines())
}
}

#[pyfunction]
///Entry point for expectation_maximization
pub fn run_expectation_maximization(
_py: Python,
sam_path: String,
reassigned_path: String,
Expand Down
7 changes: 0 additions & 7 deletions utils/eliminate_subtraction/Cargo.lock

This file was deleted.

8 changes: 0 additions & 8 deletions utils/eliminate_subtraction/Cargo.toml

This file was deleted.

Empty file.
6 changes: 0 additions & 6 deletions utils/eliminate_subtraction/output.sam

This file was deleted.

Loading

0 comments on commit 001d7e7

Please sign in to comment.