Skip to content

Commit

Permalink
Squashed 'lib/mmseqs/' changes from f349118312..ce7bf53b82
Browse files Browse the repository at this point in the history
ce7bf53b82 Point Kalamari3.7v to a fixed commit soedinglab/MMseqs2#531
fcf5260080 Remove a level of indirection to access compatible index version
922e2691e0 Fix failing utility tests
74c3aa65e5 Fix typo (violoations -> violations) (#526)
7281baf933 Add --comp-bias-corr-scale
d89fcecf99 Write serialized index in appenddbtoindex
79ea1ee301 Fix new IndexReader USER_SELECT trying to read header databases as fallback
a506d677f2 Allow subprojects to build their own precomputed indices
75af0c82ed Add appenddbtoindex to argument a precomputed index in sub-projects
4f046dd197 Add mask prob to mask sequence
38cf3f1085 Fix TestIndexTable
b768f48f0b Add --mask-prob parameter
bfc6f85bbb removed error message for wrapped scoring, should work with all rescore modes
edb8223d1e Fix pairaln
6e7ed70055 Merge branch 'master' of https://github.com/soedinglab/mmseqs2
e19df7ce10 Rework pairing to support more than two sequences
9fded60acb Add environment variable MMSEQS_IGNORE_INDEX to ignore an existing precomputed target index
efacc6904c Cushioning the overestimated number of diagonals in case of many successive hits on one diagonal
5fc318b6d8 Add convertalis --format-mode 4 to print blast-tab headers
80fcaddefb Disable profile gap scores in msa2profile temporarily
9cc89aa594 Fix huge memory allocations introduced in 49c2b70
a8c30da56d result2msa correctly prints X residues
482dedc657 Explicitly set threads in Cirrus
75e9bfaa29 Update tectonic in azure to fix error in userguide building
16830a5247 Fix number of CPUs used in cirrus
aab640d279 Fix gap pseudocount mode again
716fb6217d Turn --k-score into MuliParam so it works correctly in iterative-profile search
56816b3964 Resfinder download should not use tar wildcards, broken in busybox #494
e85ceb9d14 Change the url for UniRef* from ftp to https in databases downloader (#496)
49c2b70b47 Fix mem. issue
09e261bf19 Avoid substracting from getMaxSeqLen
4b77690ea1 Move maxSeqLen logig to getMaxSeqLen() to avoid index issues
d87369739b Fix max length in DBReader Allocate CSProfile only when needed
42bf6438fe Rework download database
5afd33c37f Make "databases" usable in sub-projects
f65187996c Update regression
f3f5b13350 Update k-score sensitivity fitting for no-cntxt profile searches
3e92abf7d9 Add db-load-mode support to pairaln
5e245d17b9 copy dbtype and clear map
4a3bb34080 Merge branch 'master' of https://github.com/milot-mirdita/mmseqs2
9a0df0d25a Add pairaln
fa44760ec6 Fix recent forgotten else in getKmerThreshold
45b2b52175 Revert "Try increasing the k-mer thresholds again for 5/6-mers"
be11943326 Fix prefilter not correctly masking extended dbtype for comparision
e3ce4605e2 Fix memory leak in MappingReader uncovered by ASan
06bdc5e796 Fix missing cassert header in tsv2exprofiledb
8521fb45c3 Remove useless calls to opendir/closedir in FileUtil
885b46999b Add workflow to create expandable profile (profile-profile) db from a bunch of TSV files
ad05844f36 Add missing pseudocount check in indexdb
e33c32aae7 Fit new values for prefilter
7950368f70 Fix another broken test
b456cf51dd Fix unused variables in lca
003cd244b5 Merge remote-tracking branch 'main/master'
6a8f586bed Add extended dbtype to check for context specific pseudocounts, so that the correctly fitted kmer thresholds can be used
92a19497b3 Fix uninitialized warning in addtaxonomy
2e75435ec7 Fix createbintaxonomy mapping dump size written
178eacff4e impl. contextPseudoCnts getKmerThreshold, values not fitted yet
35c67c87c1 Change pos. spec. gap costs to templates
9defdf8910 fixed bug for uneven number of repeated kmers
0c26a1077d replaced global with end_to_end in rescore mode variable
9064061dde fixed size_t parameter handling
3fa46fe3b9 Merge branch 'master' of https://github.com/soedinglab/mmseqs2
763fa9ffb7 Change compress loop to omp static to keep order
49710b7f41 Fix sub. mat asan issue
d0a00d6a30 Update Sub. Mat. logic for aa2num mapping
ccf5555980 Fix test
e4aae9271f Make taxonomy mapping mmap'able for instant read-in
c66fd1b10e Fix syntax error in filterresult
8762359677 Fix issues with include identities in filterresult
91617c4b78 Add includeIdentity to filterresult
fe16da3957 Stay compatible with previous short A3M header output format
ce5b241800 Fix wrong assumption about header databases IDs with new index database scheme in result2msa
a54df87419 Remove E-value threshold in filterresults
5647a56a8c Allow --diff 0
d565619151 Add MSA output mode for A3M+aln info
85ce847221 Expand can filter in each target cluster before expanding
ae4c7ab1b5 Merge branch 'master' of https://github.com/soedinglab/MMseqs2
38ab523ae7 Merge branch 'master' of https://github.com/soedinglab/mmseqs2
5e0d11f256 Extend MSA filtering for bucketed filtering within qid buckets
c6d8ae0c05 Add filter min enable
25cb16fff4 Enable result2profile/filterresult to read new expand alignment index
37225004ad Don't mask consensus sequences in profiles
b2a3402022 Ignore cacode warnings
c3e90f4197 Allow indexing of profile-profile db
66fa3c76d6 Update regression to remove result2pp from expand check
87fed2e60a Merge remote-tracking branch 'main/master'
5b75b8421a Try increasing the k-mer thresholds again for 5/6-mers
01492c9581 Revert "Make sure QueryMatcher::radixSortByScoreSize cant corrupt memory"
86152a2fb6 Remove useless calls to std::map::operator[]
d4dd06d27a Fix iterative profile search restartable again
91b617067f Make sure QueryMatcher::radixSortByScoreSize cant corrupt memory
af3170952e Save a buch of work when sequences are not needed in expand*
be5a1da484 Replace many aligned allocation in MultipleAlignment with single allocation
7469d5999a Fix unused warning
942a012a5a Move MultiParam::format out of header to avoid compilation warning
d214805827 Fix unused parameter warning
40ba03f461 Disable warnings from nedmalloc (external dependency)
c811a511a0 Fix tests after profile-profile refactoring
7a8ee48507 Try to fix profile-profile alignment for SSE
68862ed27c Add missing simd.h functions for SSE
a09de7eb8e Fix compile errors
807d97a9fa Merge remote-tracking branch 'main/master' into ppmerge
4578f8ba34 Temporary change to slicesearch to speed things up
3a51b4456c Add support to support position-specific gap penalties in profile-profile alignment in iterative search.
3d40f1055b Fixes for gap panalties merge
2718ca7524 First attempt to merge prof-prof and gap-penalties
93f90b045f Fixes to last merge
b78111882d Merge branch 'master' into main-master
22a7bfa243 Add iterativepp workflow
2a4a2dc5ee Add correlation score parameter to align
f9d2ae30e9 Add support for new Multiparameter type
cbc1b4898c Refactor pseudocounts
1e58454a94 Restore K4000.crf from history
d9f2041e99 Merge branch 'master' of https://github.com/soedinglab/mmseqs2
971f9d9090 Turn profiles from lin-space to scores, add average profile-profile code
3af62f0651 Fix banded_sw
725d9f6349 Modified Profile-Profile alignment implementation with templates.
60d5be1752 Add missing var to profile
12b78e3f4f Merge branch 'master' of https://github.com/haydenji0731/MMseqs
2aaac47a64 First running version of double max profile/profile
db1c38b1c0 Made changes to SSW class for Profile2Profile Alignment
b001dfb2af Made modifications for Profile-Profile alignment. Changes belong to SSW, Alignment, Matcher. Right before integrating lin space vector cost calculation for H value.
521c0d257b Made modifications to ssw algorithm implementation.

git-subtree-dir: lib/mmseqs
git-subtree-split: ce7bf53b8241f7ced20f5a75bab052da98e5ca79
  • Loading branch information
elileka committed Feb 12, 2022
1 parent 0e8c2a0 commit b8a3458
Show file tree
Hide file tree
Showing 143 changed files with 85,129 additions and 3,613 deletions.
3 changes: 2 additions & 1 deletion .cirrus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ task:
name: "Old compilers"
container:
image: debian:jessie-slim
cpu: 4
memory: 8G
matrix:
- name: Clang-4
Expand All @@ -32,5 +33,5 @@ task:
mkdir build && cd build
cmake -DCMAKE_BUILD_TYPE=Release -DHAVE_TESTS=1 -DENABLE_WERROR=1 -DHAVE_SSE4_1=1 -DREQUIRE_OPENMP=0 ..
make -j $(nproc --all)
test_script: ./util/regression/run_regression.sh ./build/src/mmseqs SCRATCH SEARCH
test_script: MMSEQS_NUM_THREADS=4 ./util/regression/run_regression.sh ./build/src/mmseqs SCRATCH SEARCH

2 changes: 1 addition & 1 deletion azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
path: $(Pipeline.Workspace)/tectonic-cache/
displayName: Cache Tectonic
- script: |
wget -qO- https://github.com/tectonic-typesetting/tectonic/releases/download/tectonic%400.4.1/tectonic-0.4.1-x86_64-unknown-linux-gnu.tar.gz | tar xzvf - tectonic
wget -qO- https://github.com/tectonic-typesetting/tectonic/releases/download/tectonic%400.8.0/tectonic-0.8.0-x86_64-unknown-linux-gnu.tar.gz | tar xzvf - tectonic
wget -qO- https://github.com/jgm/pandoc/releases/download/2.11.3.2/pandoc-2.11.3.2-linux-amd64.tar.gz | tar --strip-components=2 -xzvf - pandoc-2.11.3.2/bin/pandoc
sudo mv -f pandoc tectonic /usr/local/bin
displayName: Install Dependencies
Expand Down
2 changes: 1 addition & 1 deletion data/resources/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,6 @@ set(GENERATED_RESOURCES
resources/cs219.lib
resources/krona_prelude.html
resources/result_viz_prelude.html.zst

resources/K4000.crf
PARENT_SCOPE
)
80,003 changes: 80,003 additions & 0 deletions data/resources/K4000.crf

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions data/workflow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,7 @@ set(GENERATED_WORKFLOWS
workflow/linsearch.sh
workflow/databases.sh
workflow/nucleotide_clustering.sh
workflow/iterativepp.sh
workflow/tsv2exprofiledb.sh
PARENT_SCOPE
)
29 changes: 17 additions & 12 deletions data/workflow/blastpgp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ STEP=0
[ -z "$NUM_IT" ] && NUM_IT=3;
while [ $STEP -lt $NUM_IT ]; do
# call prefilter module
if notExists "$TMP_PATH/pref_$STEP.dbtype"; then
if notExists "$TMP_PATH/pref_tmp_${STEP}.done"; then
PARAM="PREFILTER_PAR_$STEP"
eval TMP="\$$PARAM"
if [ $STEP -eq 0 ]; then
Expand All @@ -39,24 +39,22 @@ while [ $STEP -lt $NUM_IT ]; do
$RUNNER "$MMSEQS" prefilter "$QUERYDB" "$2" "$TMP_PATH/pref_tmp_$STEP" ${TMP} \
|| fail "Prefilter died"
fi
touch "$TMP_PATH/pref_tmp_${STEP}.done"
fi

if [ $STEP -ge 1 ]; then
if notExists "$TMP_PATH/pref_$STEP.dbtype"; then
if notExists "$TMP_PATH/pref_$STEP.done"; then
STEPONE=$((STEP-1))
# shellcheck disable=SC2086
"$MMSEQS" subtractdbs "$TMP_PATH/pref_tmp_$STEP" "$TMP_PATH/aln_$STEPONE" "$TMP_PATH/pref_$STEP" $SUBSTRACT_PAR \
|| fail "Substract died"

|| fail "Substract died"
"$MMSEQS" rmdb "$TMP_PATH/pref_tmp_$STEP"

#mv -f "$TMP_PATH/pref_next_$STEP" "$TMP_PATH/pref_$STEP"
#mv -f "$TMP_PATH/pref_next_$STEP.index" "$TMP_PATH/pref_$STEP.index"
fi
touch "$TMP_PATH/pref_$STEP.done"
fi

# call alignment module
if notExists "$TMP_PATH/aln_tmp_$STEP.dbtype"; then
if notExists "$TMP_PATH/aln_tmp_$STEP.done"; then
PARAM="ALIGNMENT_PAR_$STEP"
eval TMP="\$$PARAM"

Expand All @@ -69,21 +67,22 @@ while [ $STEP -lt $NUM_IT ]; do
$RUNNER "$MMSEQS" "${ALIGN_MODULE}" "$QUERYDB" "$2" "$TMP_PATH/pref_$STEP" "$TMP_PATH/aln_tmp_$STEP" ${TMP} \
|| fail "Alignment died"
fi
touch "$TMP_PATH/aln_tmp_$STEP.done"
fi

if [ $STEP -gt 0 ]; then
if notExists "$TMP_PATH/aln_$STEP.dbtype"; then
if notExists "$TMP_PATH/aln_$STEP.done"; then
STEPONE=$((STEP-1))

if [ $STEP -ne $((NUM_IT - 1)) ]; then
"$MMSEQS" mergedbs "$QUERYDB" "$TMP_PATH/aln_$STEP" "$TMP_PATH/aln_$STEPONE" "$TMP_PATH/aln_tmp_$STEP" \
|| fail "Alignment died"
else
"$MMSEQS" mergedbs "$QUERYDB" "$3" "$TMP_PATH/aln_$STEPONE" "$TMP_PATH/aln_tmp_$STEP" \
|| fail "Alignment died"
|| fail "Alignment died"
fi
"$MMSEQS" rmdb "$TMP_PATH/aln_$STEPONE"
"$MMSEQS" rmdb "$TMP_PATH/aln_tmp_$STEP"
touch "$TMP_PATH/aln_$STEP.done"
fi
fi

Expand All @@ -94,7 +93,7 @@ while [ $STEP -lt $NUM_IT ]; do
eval TMP="\$$PARAM"
# shellcheck disable=SC2086
$RUNNER "$MMSEQS" result2profile "$QUERYDB" "$2" "$TMP_PATH/aln_$STEP" "$TMP_PATH/profile_$STEP" ${TMP} \
|| fail "Create profile died"
|| fail "Create profile died"
fi
fi
QUERYDB="$TMP_PATH/profile_$STEP"
Expand All @@ -104,12 +103,18 @@ done
if [ -n "$REMOVE_TMP" ]; then
STEP=0
while [ "$STEP" -lt "$NUM_IT" ]; do
if [ $STEP -gt 0 ]; then
rm -f -- "$TMP_PATH/aln_$STEP.done" "$TMP_PATH/pref_$STEP.done"
fi
rm -f -- "$TMP_PATH/aln_tmp_$STEP.done" "$TMP_PATH/pref_tmp_${STEP}.done"
# shellcheck disable=SC2086
"$MMSEQS" rmdb "${TMP_PATH}/pref_$STEP" ${VERBOSITY}
# shellcheck disable=SC2086
"$MMSEQS" rmdb "${TMP_PATH}/aln_$STEP" ${VERBOSITY}
# shellcheck disable=SC2086
"$MMSEQS" rmdb "${TMP_PATH}/profile_$STEP" ${VERBOSITY}
# shellcheck disable=SC2086
"$MMSEQS" rmdb "${TMP_PATH}/profile_${STEP}_h" ${VERBOSITY}
STEP=$((STEP+1))
done
rm -f "$TMP_PATH/blastpgp.sh"
Expand Down
21 changes: 12 additions & 9 deletions data/workflow/databases.sh
Original file line number Diff line number Diff line change
Expand Up @@ -66,24 +66,24 @@ INPUT_TYPE=""
case "${SELECTION}" in
"UniRef100")
if notExists "${TMP_PATH}/uniref100.fasta.gz"; then
downloadFile "ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref100/uniref100.release_note" "${TMP_PATH}/version"
downloadFile "ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref100/uniref100.fasta.gz" "${TMP_PATH}/uniref100.fasta.gz"
downloadFile "https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref100/uniref100.release_note" "${TMP_PATH}/version"
downloadFile "https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref100/uniref100.fasta.gz" "${TMP_PATH}/uniref100.fasta.gz"
fi
push_back "${TMP_PATH}/uniref100.fasta.gz"
INPUT_TYPE="FASTA_LIST"
;;
"UniRef90")
if notExists "${TMP_PATH}/uniref90.fasta.gz"; then
downloadFile "ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.release_note" "${TMP_PATH}/version"
downloadFile "ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz" "${TMP_PATH}/uniref90.fasta.gz"
downloadFile "https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.release_note" "${TMP_PATH}/version"
downloadFile "https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz" "${TMP_PATH}/uniref90.fasta.gz"
fi
push_back "${TMP_PATH}/uniref90.fasta.gz"
INPUT_TYPE="FASTA_LIST"
;;
"UniRef50")
if notExists "${TMP_PATH}/uniref50.fasta.gz"; then
downloadFile "ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref50/uniref50.release_note" "${TMP_PATH}/version"
downloadFile "ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref50/uniref50.fasta.gz" "${TMP_PATH}/uniref50.fasta.gz"
downloadFile "https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref50/uniref50.release_note" "${TMP_PATH}/version"
downloadFile "https://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref50/uniref50.fasta.gz" "${TMP_PATH}/uniref50.fasta.gz"
fi
push_back "${TMP_PATH}/uniref50.fasta.gz"
INPUT_TYPE="FASTA_LIST"
Expand Down Expand Up @@ -223,8 +223,11 @@ case "${SELECTION}" in
if notExists "${TMP_PATH}/download.done"; then
downloadFile "https://api.bitbucket.org/2.0/repositories/genomicepidemiology/resfinder_db/commit/master?fields=hash,date" "${TMP_PATH}/version"
downloadFile "https://bitbucket.org/genomicepidemiology/resfinder_db/get/master.tar.gz" "${TMP_PATH}/master.tar.gz"
tar -C "${TMP_PATH}" --strip-components=1 -xzvf "${TMP_PATH}/master.tar.gz" "*.fsa"
rm -f "${TMP_PATH}/master.tar.gz"
# avoid tar wildcard extraction as it's not available in busybox tar (windows, biocontainer)
mkdir -p "${TMP_PATH}/fsa"
tar -C "${TMP_PATH}/fsa" --strip-components=1 -xzvf "${TMP_PATH}/master.tar.gz"
mv -f -- "${TMP_PATH}/fsa/"*.fsa "${TMP_PATH}"
rm -rf -- "${TMP_PATH}/master.tar.gz" "${TMP_PATH}/fsa"
touch "${TMP_PATH}/download.done"
fi
INPUT_TYPE="FSA"
Expand Down Expand Up @@ -254,7 +257,7 @@ case "${SELECTION}" in
"Kalamari")
if notExists "${TMP_PATH}/kalamari.tsv"; then
printf "3.7 %s\n" "$(date "+%s")" > "${TMP_PATH}/version"
downloadFile "https://raw.githubusercontent.com/lskatz/Kalamari/master/src/Kalamari_v3.7.tsv" "${TMP_PATH}/kalamari.tsv"
downloadFile "https://raw.githubusercontent.com/lskatz/Kalamari/18d71da740546ba4a5117682e1ae2a037379afe0/src/Kalamari_v3.7.tsv" "${TMP_PATH}/kalamari.tsv"
fi
ACCESSIONS=""
# shellcheck disable=SC2034
Expand Down
95 changes: 95 additions & 0 deletions data/workflow/iterativepp.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#!/bin/sh -e
# Iterative sequence search workflow script
fail() {
echo "Error: $1"
exit 1
}

notExists() {
[ ! -f "$1" ]
}

# pre-processing
[ -z "$MMSEQS" ] && echo "Please set the environment variable \$MMSEQS to your MMSEQS binary." && exit 1;
# check number of input variables
[ "$#" -lt 4 ] && echo "Please provide <queryDB> <targetDB> <outDB> <tmp>" && exit 1;

# check if input files exist
[ ! -f "$1.dbtype" ] && echo "$1.dbtype not found!" && exit 1;
[ ! -f "$2.dbtype" ] && echo "$2.dbtype not found!" && exit 1;
# TODO: ask for $2_aln and that this contains backtrace
#[ ! -f "$2_aln" ] && echo "$2_aln not found!" && exit 1;
[ -f "$3.dbtype" ] && echo "$3.dbtype exists already!" && exit 1;
[ ! -d "$4" ] && echo "tmp directory $4 not found!" && mkdir -p "$4";

QUERYDB="$1"
TARGETDB="$2"
ALNDB="${2}_aln"
SEQDB="${2}_seq"
TMP_PATH="$4"
STEP=0

while [ $STEP -lt "$NUM_IT" ]; do
# call slice search for the first iteration
if [ $STEP -eq 0 ]; then
# shellcheck disable=SC2086
"$MMSEQS" search "$QUERYDB" "$TARGETDB" "$TMP_PATH/aln_$STEP" "$TMP_PATH" ${SEARCH_PAR} \
|| fail "Slicesearch died"
# shellcheck disable=SC2086
"$MMSEQS" profile2consensus "$TARGETDB" "$2_consensus" ${CONSENSUS_PAR} \
|| fail "Profile2Consensus died"
TARGETDB="$2_consensus"
fi
# call prefilter module
if [ $STEP -gt 0 ]; then
PARAM="PREFILTER_PAR_$STEP"
eval TMP="\$$PARAM"
# shellcheck disable=SC2086
$RUNNER "$MMSEQS" prefilter "$QUERYDB" "$TARGETDB" "$TMP_PATH/pref_tmp_$STEP" ${TMP} \
|| fail "Prefilter died"
STEPPREV=$((STEP-1))
# shellcheck disable=SC2086
"$MMSEQS" subtractdbs "$TMP_PATH/pref_tmp_$STEP" "$TMP_PATH/aln_$STEPPREV" "$TMP_PATH/pref_$STEP" $SUBTRACT_PAR \
|| fail "Subtract died"
"$MMSEQS" rmdb "$TMP_PATH/pref_tmp_$STEP"
# call alignment module
PARAM="ALIGNMENT_PAR_$STEP"
eval TMP="\$$PARAM"
# shellcheck disable=SC2086
$RUNNER "$MMSEQS" align "$QUERYDB" "$2" "$TMP_PATH/pref_$STEP" "$TMP_PATH/aln_tmp_$STEP" ${TMP} \
|| "Alignment died"
# merge alignment dbs
STEPPREV=$((STEP-1))
"$MMSEQS" mergedbs "$QUERYDB" "$TMP_PATH/aln_$STEP" "$TMP_PATH/aln_$STEPPREV" "$TMP_PATH/aln_tmp_$STEP" \
|| fail "Mergedbs died"
#"$MMSEQS" rmdb "$TMP_PATH/aln_$STEPPREV"
#"$MMSEQS" rmdb "$TMP_PATH/aln_tmp_$STEP"
fi
# expand alignment dbs
if [ $STEP -ne $((NUM_IT - 1)) ]; then
# shellcheck disable=SC2086
"$MMSEQS" expand2profile "$QUERYDB" "$SEQDB" "$TMP_PATH/aln_$STEP" "$ALNDB" "$TMP_PATH/profile_$STEP" $EXPANDPROFILE_PAR \
|| fail 'Expand2Profile died'
else
# PARAM="EXPANDALN_PAR"
# shellcheck disable=SC2086
"$MMSEQS" expandaln "$QUERYDB" "$SEQDB" "$TMP_PATH/aln_$STEP" "$ALNDB" "$3" $EXPANDALN_PAR \
|| fail "Expandaln died"
fi
QUERYDB="$TMP_PATH/profile_$STEP"
STEP=$((STEP+1))
done

if [ -n "$REMOVE_TMP" ]; then
STEP=0
while [ "$STEP" -lt "$NUM_IT" ]; do
# shellcheck disable=SC2086
"$MMSEQS" rmdb "${TMP_PATH}/pref_$STEP" ${VERBOSITY}
# shellcheck disable=SC2086
"$MMSEQS" rmdb "${TMP_PATH}/aln_$STEP" ${VERBOSITY}
# shellcheck disable=SC2086
"$MMSEQS" rmdb "${TMP_PATH}/profile_$STEP" ${VERBOSITY}
STEP=$((STEP+1))
done
rm -f "$TMP_PATH/iterativepp.sh"
fi
1 change: 1 addition & 0 deletions data/workflow/searchslicedtargetprofile.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ fi

TOTAL_NUM_PROFILES=$(wc -l < "${PROFILEDB}.index")
NUM_SEQS_THAT_SATURATE="$(wc -l < "${INPUT}.index")"
#NUM_SEQS_THAT_SATURATE="$((NUM_SEQS_THAT_SATURATE/10))"
FIRST_INDEX_LINE=1
NUM_PROFS_IN_STEP=1
NUM_PREF_RESULTS_IN_ALL_PREV_STEPS=0
Expand Down
47 changes: 47 additions & 0 deletions data/workflow/tsv2exprofiledb.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/bin/sh -e
# shellcheck disable=SC2086
[ -z "$MMSEQS" ] && echo "Please set the environment variable \$MMSEQS to your MMSEQS binary." && exit 1;
[ "$#" -ne 2 ] && echo "Please provide <inputTSV> <outDB>" && exit 1

notExists() {
[ ! -f "$1" ]
}

IN="$1"
OUT="$2"

[ ! -f "${IN}.tsv" ] && echo "${IN}.tsv not found!" && exit 1;
[ ! -f "${IN}_h.tsv" ] && echo "${IN}_h.tsv not found!" && exit 1;
[ ! -f "${IN}_seq.tsv" ] && echo "${IN}_seq.tsv not found!" && exit 1;
[ ! -f "${IN}_aln.tsv" ] && echo "${IN}_aln.tsv not found!" && exit 1;
[ -d "${OUT}.tsv" ] && echo "${OUT} is a directory!" && exit 1;

if notExists "${OUT}_h.dbtype"; then
"$MMSEQS" tsv2db "${IN}_h.tsv" "${OUT}_h" --output-dbtype 12 ${VERBOSITY}
fi

if notExists "${OUT}.dbtype"; then
"$MMSEQS" tsv2db "${IN}.tsv" "${OUT}_tmp" --output-dbtype 0 ${VERBOSITY}
MMSEQS_FOCE_MERGE=1 "$MMSEQS" compress "${OUT}_tmp" "${OUT}" ${VERBOSITY}
"$MMSEQS" rmdb "${OUT}_tmp" ${VERBOSITY}
fi

if notExists "${OUT}_seq.dbtype"; then
"$MMSEQS" tsv2db "${IN}_seq.tsv" "${OUT}_seq_tmp" --output-dbtype 0 ${VERBOSITY}
MMSEQS_FOCE_MERGE=1 "$MMSEQS" compress "${OUT}_seq_tmp" "${OUT}_seq" ${VERBOSITY}
"$MMSEQS" rmdb "${OUT}_seq_tmp" ${VERBOSITY}
fi

if notExists "${OUT}_aln.dbtype"; then
"$MMSEQS" tsv2db "${IN}_aln.tsv" "${OUT}_aln_tmp" --output-dbtype 5 ${VERBOSITY}
MMSEQS_FOCE_MERGE=1 "$MMSEQS" compress "${OUT}_aln_tmp" "${OUT}_aln" ${VERBOSITY}
"$MMSEQS" rmdb "${OUT}_aln_tmp" ${VERBOSITY}
fi

if notExists "${OUT}_seq_h.dbtype"; then
"$MMSEQS" aliasdb "${OUT}_h" "${OUT}_seq_h" ${VERBOSITY}
fi

if [ -e "${OUT}.sh" ]; then
rm -f -- "${OUT}.sh"
fi
2 changes: 1 addition & 1 deletion lib/cacode/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ add_library(cacode OBJECT
nrutil.cpp
nrutil.h
)
set_target_properties(cacode PROPERTIES COMPILE_FLAGS "${MMSEQS_CXX_FLAGS}" LINK_FLAGS "${MMSEQS_CXX_FLAGS}")
set_target_properties(cacode PROPERTIES COMPILE_FLAGS "${MMSEQS_CXX_FLAGS} -w" LINK_FLAGS "${MMSEQS_CXX_FLAGS} -w")
4 changes: 2 additions & 2 deletions lib/nedmalloc/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
add_library(nedmalloc nedmalloc.c)
set_target_properties(nedmalloc
PROPERTIES
COMPILE_FLAGS "-DREPLACE_SYSTEM_ALLOCATOR -DWIN32 -DNO_MALLINFO=1 ${MMSEQS_CXX_FLAGS}"
LINK_FLAGS "-DREPLACE_SYSTEM_ALLOCATOR -DWIN32 -DNO_MALLINFO=1 ${MMSEQS_CXX_FLAGS}")
COMPILE_FLAGS "-DREPLACE_SYSTEM_ALLOCATOR -DWIN32 -DNO_MALLINFO=1 ${MMSEQS_CXX_FLAGS} -w"
LINK_FLAGS "-DREPLACE_SYSTEM_ALLOCATOR -DWIN32 -DNO_MALLINFO=1 ${MMSEQS_CXX_FLAGS} -w")
Loading

0 comments on commit b8a3458

Please sign in to comment.