Skip to content

Commit e656a80

Browse files
committed
Merge branch 'master' of github.com:walaj/SeqLib into jh_nov2018
2 parents eb776b9 + 0877128 commit e656a80

20 files changed

+332
-188
lines changed

Makefile.am

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ AUTOMAKE_OPTIONS = foreign
22
SUBDIRS = bwa htslib fermi-lite src
33

44
install:
5-
mkdir -p bin && cp src/libseqlib.a fermi-lite/libfml.a bwa/libbwa.a htslib/libhts.a bin
5+
mkdir -p lib && cp src/libseqlib.a fermi-lite/libfml.a bwa/libbwa.a htslib/libhts.a lib
66

77
seqtools:
88
mkdir -p bin && cd src/seqtools && make && mv seqtools ../../bin

Makefile.in

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
# Makefile.in generated by automake 1.15 from Makefile.am.
1+
# Makefile.in generated by automake 1.16.1 from Makefile.am.
22
# @configure_input@
33

4-
# Copyright (C) 1994-2014 Free Software Foundation, Inc.
4+
# Copyright (C) 1994-2018 Free Software Foundation, Inc.
55

66
# This Makefile.in is free software; the Free Software Foundation
77
# gives unlimited permission to copy and/or distribute it,
@@ -132,7 +132,7 @@ am__recursive_targets = \
132132
$(RECURSIVE_CLEAN_TARGETS) \
133133
$(am__extra_recursive_targets)
134134
AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \
135-
cscope distdir dist dist-all distcheck
135+
cscope distdir distdir-am dist dist-all distcheck
136136
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) \
137137
$(LISP)config.h.in
138138
# Read a list of newline-separated strings from the standard input,
@@ -320,8 +320,8 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
320320
echo ' $(SHELL) ./config.status'; \
321321
$(SHELL) ./config.status;; \
322322
*) \
323-
echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe)'; \
324-
cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__depfiles_maybe);; \
323+
echo ' cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles)'; \
324+
cd $(top_builddir) && $(SHELL) ./config.status $@ $(am__maybe_remake_depfiles);; \
325325
esac;
326326

327327
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
@@ -454,7 +454,10 @@ distclean-tags:
454454
-rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
455455
-rm -f cscope.out cscope.in.out cscope.po.out cscope.files
456456

457-
distdir: $(DISTFILES)
457+
distdir: $(BUILT_SOURCES)
458+
$(MAKE) $(AM_MAKEFLAGS) distdir-am
459+
460+
distdir-am: $(DISTFILES)
458461
$(am__remove_distdir)
459462
test -d "$(distdir)" || mkdir "$(distdir)"
460463
@srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
@@ -519,7 +522,7 @@ distdir: $(DISTFILES)
519522
! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \
520523
|| chmod -R a+r "$(distdir)"
521524
dist-gzip: distdir
522-
tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz
525+
tardir=$(distdir) && $(am__tar) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).tar.gz
523526
$(am__post_remove_distdir)
524527

525528
dist-bzip2: distdir
@@ -545,7 +548,7 @@ dist-shar: distdir
545548
@echo WARNING: "Support for shar distribution archives is" \
546549
"deprecated." >&2
547550
@echo WARNING: "It will be removed altogether in Automake 2.0" >&2
548-
shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz
551+
shar $(distdir) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).shar.gz
549552
$(am__post_remove_distdir)
550553

551554
dist-zip: distdir
@@ -563,7 +566,7 @@ dist dist-all:
563566
distcheck: dist
564567
case '$(DIST_ARCHIVES)' in \
565568
*.tar.gz*) \
566-
GZIP=$(GZIP_ENV) gzip -dc $(distdir).tar.gz | $(am__untar) ;;\
569+
eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).tar.gz | $(am__untar) ;;\
567570
*.tar.bz2*) \
568571
bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\
569572
*.tar.lz*) \
@@ -573,7 +576,7 @@ distcheck: dist
573576
*.tar.Z*) \
574577
uncompress -c $(distdir).tar.Z | $(am__untar) ;;\
575578
*.shar.gz*) \
576-
GZIP=$(GZIP_ENV) gzip -dc $(distdir).shar.gz | unshar ;;\
579+
eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).shar.gz | unshar ;;\
577580
*.zip*) \
578581
unzip $(distdir).zip ;;\
579582
esac
@@ -767,7 +770,7 @@ uninstall-am:
767770

768771

769772
install:
770-
mkdir -p bin && cp src/libseqlib.a fermi-lite/libfml.a bwa/libbwa.a htslib/libhts.a bin
773+
mkdir -p lib && cp src/libseqlib.a fermi-lite/libfml.a bwa/libbwa.a htslib/libhts.a lib
771774

772775
seqtools:
773776
mkdir -p bin && cd src/seqtools && make && mv seqtools ../../bin

SeqLib/BWAWrapper.h

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ class BWAWrapper {
3333
*/
3434
BWAWrapper() {
3535
idx = 0;
36+
copy_comment = false;
3637
memopt = mem_opt_init();
3738
memopt->flag |= MEM_F_SOFTCLIP;
3839
}
@@ -65,6 +66,16 @@ class BWAWrapper {
6566
void AlignSequence(const std::string& seq, const std::string& name, BamRecordVector& vec, bool hardclip,
6667
double keep_sec_with_frac_of_primary_score, int max_secondary) const;
6768

69+
/** Perform a BWA-MEM alignment of a single sequence, and store hits in BamReadVector
70+
* @param us UnalignedSequence to be aligned
71+
* @param vec Alignment hits are appended to vec
72+
* @param hardclip Should the output BamRecord objects be hardclipped
73+
* @param keep_sec_with_frac_of_primary_score Set a threshold for whether a secondary alignment should be output
74+
* @param max_secondary Set a hard-limit on the number of secondary hits that will be reported
75+
*/
76+
void AlignSequence(const UnalignedSequence& us, BamRecordVector& vec, bool hardclip,
77+
double keep_sec_with_frac_of_primary_score, int max_secondary) const;
78+
6879
/** Construct a new bwa index for this object.
6980
* @param v vector of references to input (e.g. v = {{"r1", "AT"}};)
7081
*
@@ -157,6 +168,11 @@ class BWAWrapper {
157168

158169
/** Check if the index is empty */
159170
bool IsEmpty() const { return !idx; }
171+
172+
/** Append FASTA/Q comment to SAM output */
173+
void AppendFqComment(){
174+
copy_comment = true;
175+
}
160176

161177
private:
162178

@@ -166,6 +182,9 @@ class BWAWrapper {
166182
// Store the options in memory
167183
mem_opt_t * memopt;
168184

185+
// Store append FASTA/Q comment to SAM output
186+
bool copy_comment;
187+
169188
// hold the full index structure
170189
bwaidx_t* idx;
171190

SeqLib/BamReader.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,11 @@ extern "C" {
1111
int hts_useek(htsFile *file, long uoffset, int where);
1212
}
1313

14-
class BamReader;
1514

1615
namespace SeqLib {
1716

17+
class BamReader;
18+
1819
typedef SeqPointer<hts_idx_t> SharedIndex; ///< Shared pointer to the HTSlib index struct
1920

2021
typedef SeqPointer<htsFile> SharedHTSFile; ///< Shared pointer to the HTSlib file pointer
@@ -30,6 +31,11 @@ namespace SeqLib {
3031

3132
_Bam() : m_region_idx(0), empty(true), mark_for_closure(false) {}
3233

34+
//! Return the header for this BAM
35+
const BamHeader& GetHeader() const {
36+
return m_hdr;
37+
}
38+
3339
~_Bam() {}
3440

3541
std::string GetFileName() const { return m_in; }

SeqLib/BamRecord.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,9 @@ class BamRecord {
286286
/** BamRecord is failed QC */
287287
inline bool QCFailFlag() const { return b ? ((b->core.flag&BAM_FQCFAIL) != 0) : false; }
288288

289+
/** BamRecord is supplementary alignment */
290+
inline bool SupplementaryFlag() const { return b ? ((b->core.flag&BAM_FSUPPLEMENTARY) != 0) : false; }
291+
289292
/** BamRecord is mapped */
290293
inline bool MappedFlag() const { return b ? ((b->core.flag&BAM_FUNMAP) == 0) : false; }
291294

@@ -636,9 +639,9 @@ class BamRecord {
636639
uint32_t* c = bam_get_cigar(b);
637640
int32_t p = 0;
638641
for (size_t i = 0; i < b->core.n_cigar; ++i) {
639-
if ( (bam_cigar_opchr(c[i]) == 'S') || (bam_cigar_opchr(c[i]) == 'H'))
642+
if (bam_cigar_opchr(c[i]) == 'S')
640643
p += bam_cigar_oplen(c[i]);
641-
else // not a clip, so stop counting
644+
else if (bam_cigar_opchr(c[i]) != 'H')
642645
break;
643646
}
644647
return p;
@@ -826,7 +829,7 @@ class BamRecord {
826829
if (b->core.tid < 0)
827830
return std::string();
828831

829-
if (h.isEmpty())
832+
if (!h.isEmpty())
830833
return h.IDtoName(b->core.tid);
831834

832835
// c++98

SeqLib/FastqReader.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@ class FastqReader {
1919
public:
2020

2121
/** Construct an empty FASTQ/FASTA reader */
22-
FastqReader() {}
22+
FastqReader() {
23+
seq = NULL;
24+
fp = NULL;
25+
}
2326

2427
/** Construct a reader and open a FASTQ/FASTA reader
2528
* @param file Path to a FASTQ or FASTA file

SeqLib/GenomicRegion.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,9 +65,10 @@ class GenomicRegion {
6565
GenomicRegion(const std::string& reg, const BamHeader& hdr);
6666

6767
/** Return a string representation of just the first base-pair
68+
* @param h BamHeader used as a lookup table for id to string (i.e. chr name)
6869
* e.g. 1:10,000
6970
*/
70-
std::string PointString() const;
71+
std::string PointString(const BamHeader& h) const;
7172

7273
// Randomize the position of this GenomicRegion on the genome
7374
//
@@ -94,9 +95,6 @@ class GenomicRegion {
9495
*/
9596
int32_t DistanceBetweenEnds(const GenomicRegion &gr) const;
9697

97-
/** Returns identical string as would be obtained from << */
98-
std::string ToString() const;
99-
10098
/** Returns true if a.chr < b.chr or a.pos1 < a.pos1 if on same chrome, or if a.pos2 < b.pos2 if same chrom and same pos1 */
10199
bool operator < (const GenomicRegion& b) const;
102100

@@ -129,6 +127,11 @@ class GenomicRegion {
129127
*/
130128
friend std::ostream& operator<<(std::ostream& out, const GenomicRegion& gr);
131129

130+
/** Print the chromosome with the correct chromosome name as store in the header
131+
* @param h BamHeader that stores the lookup between chr id and chr name string
132+
*/
133+
std::string ToString(const BamHeader& h) const;
134+
132135
/** Extract the chromosome name as a string
133136
* @param h BamHeader to serve as sequence dictionary
134137
* @exception throws an out_of_range exception if ref id >= h->n_targets

SeqLib/ReadFilter.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,11 @@ class ReadFilterCollection {
549549
return num;
550550
}
551551

552+
/** Check that the filter collection includes something.
553+
* If not, then give it a universal includer
554+
*/
555+
void CheckHasIncluder();
556+
552557
// Return the a tab-delimited tally of which filters were satisfied.
553558
// Includes the header:
554559
// total_seen_count total_passed_count region region_passed_count rule rule_passed_count

SeqLib/UnalignedSequence.h

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ extern "C" {
1616

1717
#include <cstring>
1818
#include <vector>
19+
#include <iostream>
1920

2021
namespace SeqLib {
2122

@@ -30,27 +31,39 @@ namespace SeqLib {
3031
* @param n Name of the sequence
3132
* @param s Sequence, stored as ACTG or N characters
3233
*/
33-
UnalignedSequence(const std::string& n, const std::string& s) : Name(n), Seq(s), Qual(std::string()), Strand('*') {}
34+
UnalignedSequence(const std::string& n, const std::string& s) : Name(n), Com(std::string()), Seq(s), Qual(std::string()), Strand('*') {}
3435

3536
/** Construct an unaligned sequence with name, sequence and quality score
3637
* @param n Name of the sequence
3738
* @param s Sequence, stored as ACTG or N characters
3839
* @param q Quality string
3940
*/
40-
UnalignedSequence(const std::string& n, const std::string& s, const std::string& q) : Name(n), Seq(s), Qual(q), Strand('*') {}
41+
UnalignedSequence(const std::string& n, const std::string& s, const std::string& q) : Name(n), Com(std::string()), Seq(s), Qual(q), Strand('*') {}
4142

4243
/** Construct an unaligned sequence with name, sequence, quality score and strand
4344
* @param n Name of the sequence
4445
* @param s Sequence, stored as ACTG or N characters
4546
* @param q Quality string
4647
* @param t Strand of the sequence, one of '*', '+', '-'
4748
*/
48-
UnalignedSequence(const std::string& n, const std::string& s, const std::string& q, char t) : Name(n), Seq(s), Qual(q), Strand(t) {}
49+
UnalignedSequence(const std::string& n, const std::string& s, const std::string& q, char t) : Name(n), Com(std::string()), Seq(s), Qual(q), Strand(t) {}
4950

5051
std::string Name; ///< Name of the contig
52+
std::string Com; ///< Comment of the contig
5153
std::string Seq; ///< Sequence of the contig (upper-case ACTGN)
5254
std::string Qual; ///< Quality scores
5355
char Strand; ///< Strand of the sequence. Default is '*'
56+
57+
/** Output an unaligned sequence to ostream
58+
* @param os ostream
59+
* @param us UnalignedSequence
60+
*/
61+
friend std::ostream& operator<<(std::ostream& os, const SeqLib::UnalignedSequence& us){
62+
os << "@" << us.Name << " " << us.Com << "\n";
63+
os << us.Seq << "\n+\n";
64+
os << us.Qual << "\n";
65+
return os;
66+
}
5467
};
5568

5669
typedef std::vector<UnalignedSequence> UnalignedSequenceVector; ///< A collection of unaligned sequences

autogen.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ set -ex
44
aclocal
55
autoconf
66
autoheader
7-
automake -a --add-missing
7+
automake --add-missing --copy

0 commit comments

Comments
 (0)