Skip to content

Commit

Permalink
Restructuring code directories to conform to nimble standard, adding …
Browse files Browse the repository at this point in the history
…nimble install option, renamed functions to conform with fasta/q migration
  • Loading branch information
Nathan Roach committed Nov 20, 2020
1 parent 51a1df1 commit 6616821
Show file tree
Hide file tree
Showing 41 changed files with 83 additions and 26 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
!**.R
!**.bsh
!**.nim
!**.nims
!**.nimble
!**.md
!**.c
Expand Down
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[submodule "threadpools"]
path = threadpools
path = src/threadpools
url = https://github.com/yglukhov/threadpools.git
7 changes: 4 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
NIM = nim c

conduit:
cd poaV2 && make
$(NIM) -d:release --threads:on --passL:poaV2/liblpo.a --passL:poaV2/align_score.o conduit.nim
$(NIM) -d:release conduitUtils.nim
cd src/poaV2 && make
mkdir bin/
cd src/ && $(NIM) -d:release --threads:on --passL:poaV2/liblpo.a --passL:poaV2/align_score.o conduit.nim && mv src/conduit bin/
cd src/ && $(NIM) -d:release conduitUtils.nim && mv src/conduitUtils bin/

24 changes: 24 additions & 0 deletions conduit.nimble
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Package

version = "0.1.0"
author = "Nathan Roach"
description = "De novo transcriptome assembler"
license = "GPLv2"

# Dependencies

requires "hts >= 0.3.1", "nim >= 1.0.0"

srcDir = "src/"

before install:
echo "Building poaV2"
withDir "src/poaV2":
exec "make"

bin = @["conduit", "conduitUtils", "conduit_clustering"]
# skipDirs = @["tests"]
# skipFiles = @["GT04008021.bam"]

# task test, "run the tests":
# exec "nim c --lineDir:on --debuginfo -r tests/all"
10 changes: 7 additions & 3 deletions conduit.nim → src/conduit.nim
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import hts
import sets
import poaV2/header
import poaV2/poa
import fasta
import fastq

{.experimental.}

Expand Down Expand Up @@ -316,7 +318,7 @@ proc runPOAandCollapsePOGraph(intuple : (string,string,string,string,uint16,uint
fasta_file = infilepath
elif format == "fastq":
fasta_file = &"{outdir}{trim}.tmp.fa"
convertFASTQtoFASTA(infilepath,fasta_file)
convertFASTQFiletoFASTAfile(infilepath,fasta_file)
var split_num = 200
var (num_fastas,_) = splitFASTA2(fasta_file,&"{outdir}{trim}.tmp",split_num = split_num)
var total_fastas = num_fastas
Expand Down Expand Up @@ -414,7 +416,8 @@ proc runGraphBasedIlluminaCorrection(intuple : (string,string,string,uint64,uint
var records = getFastaRecordsFromTrimmedPOGraph(addr trim_po, representative_paths, read_supports, trim)
var outfile : File
discard open(outfile,this_fasta_filepath,fmWrite)
writeCorrectedReads(records,outfile)
writeFASTArecordsToFile(outfile,records)
# writeCorrectedReads(records,outfile)
outfile.close()
result = sameFileContent(last_fasta_filepath,this_fasta_filepath)
removeFile(last_fasta_filepath)
Expand Down Expand Up @@ -444,7 +447,8 @@ proc runLinearBasedIlluminaCorrection(intuple : (string,string,uint64,uint64,uin
corrected.add(FastaRecord(read_id : read.read_id, sequence : getSequenceFromPath(trim_po,trim_po.reads[0].corrected_path)))
var outfile : File
discard open(outfile,this_fasta_filepath,fmWrite)
writeCorrectedReads(corrected,outfile)
writeFASTArecordsToFile(outfile,corrected)
# writeCorrectedReads(corrected,outfile)
outfile.close()


Expand Down
3 changes: 3 additions & 0 deletions src/conduit.nims
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
switch("threads", "on")
switch("passL","src/poaV2/liblpo.a")
switch("passL","src/poaV2/align_score.o")
15 changes: 9 additions & 6 deletions conduitUtils.nim → src/conduitUtils.nim
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import tables
import sets
import poGraphUtils
import algorithm
import fasta
import fastq

type
BLASTmatch* = object
Expand Down Expand Up @@ -692,9 +694,9 @@ proc compareExactTranslations*(reference_infilepath : string, translation_infile
var r_infile, t_infile : File
discard open(r_infile,reference_infilepath,fmRead)
discard open(t_infile,translation_infilepath,fmRead)
let r_records = poGraphUtils.parseFasta(r_infile)
let r_records = parseFasta(r_infile)
r_infile.close()
let t_records = poGraphUtils.parseFasta(t_infile)
let t_records = parseFasta(t_infile)
t_infile.close()
var r_proteins,t_proteins : HashSet[string]
for record in r_records:
Expand All @@ -717,7 +719,7 @@ proc compareBLASTPTranslations*(reference_infilepath : string, blastp_infilepath
var match_set : HashSet[string]
var ref_infile : File
discard open(ref_infile,reference_infilepath,fmRead)
let reference_records = poGraphUtils.parseFasta(ref_infile)
let reference_records = parseFasta(ref_infile)
ref_infile.close()
for record in reference_records:
reference_id_set.incl(record.read_id)
Expand Down Expand Up @@ -1171,7 +1173,8 @@ proc getNovelLociFASTA*(infilepath,gffcompare_infilepath,outfilepath : string,fi
if record.read_id in novel_loci:
new_records.add(record)
infile.close
writeCorrectedReads(new_records,outfile)
# writeCorrectedReads(new_records,outfile)
writeFASTArecordsToFile(outfile,new_records)
outfile.close

proc parseOptions() : UtilOptions =
Expand Down Expand Up @@ -1392,7 +1395,7 @@ proc main() =
infile.close()
translateTranscripts(records,opt.outfilepath,threshold = int(opt.min_length),stranded = opt.stranded)
else:
let records = poGraphUtils.parseFasta(infile)
let records = parseFasta(infile)
infile.close()
translateTranscripts(records,opt.outfilepath,threshold = int(opt.min_length),stranded = opt.stranded)
of "strandTranscripts":
Expand All @@ -1403,7 +1406,7 @@ proc main() =
infile.close()
strandTranscripts(records,opt.outfilepath)
else:
let records = poGraphUtils.parseFasta(infile)
let records = parseFasta(infile)
infile.close()
strandTranscripts(records,opt.outfilepath)
of "bed2gtf":
Expand Down
46 changes: 33 additions & 13 deletions conduit_clustering.nim → src/conduit_clustering.nim
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import sets
import genomeKDE
import algorithm
import poGraphUtils
import conduitUtils
import fasta
import fastq

Expand All @@ -24,6 +25,7 @@ type
output_dir : string
prefix : string
out_type : string
reference : string

SpliceSiteGraph = object
adjacencies : seq[seq[uint32]]
Expand Down Expand Up @@ -91,11 +93,10 @@ proc writeClusterHelp() =
echo " Output reads in FASTA format"

proc summaryFromBamRecord( record : Record,
stranded : bool = false)
:
(char,
(uint64, uint64),
seq[(uint64, uint64)]) =
stranded : bool = false) :
( char,
(uint64, uint64),
seq[(uint64, uint64)] ) =
if record.flag.unmapped:
return
let alignment_start = uint64(record.start)
Expand Down Expand Up @@ -197,6 +198,9 @@ proc parseOptions() : ClusteringOptions =
var prefix = &"cluster_"
var prefix_flag = false

var reference_filepath = ""
var reference_filepath_flag = false

var run_flag = true
var help_flag = false
var version_flag = false
Expand Down Expand Up @@ -262,6 +266,17 @@ proc parseOptions() : ClusteringOptions =
else:
output_format_flag = true
output_format = "fasta"
of "r", "reference":
if not reference_filepath_flag:
reference_filepath_flag = true
if val != "":
reference_filepath = val
else:
last = "reference"
else:
echo "ERROR - two references provided"
help_flag = true
break
of "o", "output-dir":
if not output_dir_flag:
output_dir_flag = true
Expand Down Expand Up @@ -304,6 +319,8 @@ proc parseOptions() : ClusteringOptions =
output_dir = key
of "prefix":
prefix = key
of "reference":
reference_filepath = key
of "":
if not file_flag:
file = key
Expand All @@ -327,7 +344,8 @@ proc parseOptions() : ClusteringOptions =
cluster_zero_introns : single_exon,
output_dir : output_dir,
prefix : prefix,
out_type : output_format)
out_type : output_format,
reference : reference_filepath)


proc bfs(ssgraph : ptr SpliceSiteGraph,
Expand Down Expand Up @@ -382,6 +400,7 @@ proc getWeightedSpliceJunctionLocations(
result[1].add((ss,weight))
echo result


proc writeFASTAsFromBAM(bam : Bam,
read_id_to_cluster : ptr Table[string,int],
cluster_sizes : ptr CountTable[int],
Expand All @@ -406,10 +425,11 @@ proc writeFASTAsFromBAM(bam : Bam,
result += 1
# echo &"Opening cluster file {cluster_id + starting_count}"
open_files[cluster_id] = file
file.writeFASTArecordToFile(record)
# file.writeFASTArecordToFile(record)
file.writeBamRecordToFASTAfile(record)
else:
# echo &"Appending to cluster file {cluster_id + starting_count}"
open_files[cluster_id].writeFASTArecordToFile(record)
open_files[cluster_id].writeBamRecordToFASTAfile(record)
written_reads.inc(cluster_id)
echo written_reads[cluster_id], "\t", cluster_sizes[][cluster_id]
if written_reads[cluster_id] == cluster_sizes[][cluster_id]:
Expand All @@ -424,13 +444,13 @@ proc writeFASTAsFromBAM(bam : Bam,
proc correctBamRecordWithGenome(record : Record, fai : Fai) : FastaRecord =
let summary = summaryFromBamRecord(record, true)
var nt_sequence : string
var start_base = summary[1][0]
var start_base = int(summary[1][0])
var end_base = -1
for donor,acceptor in summary[2]:
end_base = donor
for (donor,acceptor) in summary[2]:
end_base = int(donor)
nt_sequence.add(fai.get(record.chrom,start_base,end_base))
start_base = acceptor
end_base = summary[1][1]
start_base = int(acceptor)
end_base = int(summary[1][1])
nt_sequence.add(fai.get(record.chrom,start_base,end_base))
if summary[0] == '-':
nt_sequence = nt_sequence.revComp
Expand Down
1 change: 1 addition & 0 deletions src/conduit_clustering.nims
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
switch("threads", "on")
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

0 comments on commit 6616821

Please sign in to comment.