Skip to content

Commit

Permalink
distTree_inc_new_log.sh
Browse files Browse the repository at this point in the history
  • Loading branch information
vbrover committed Jan 29, 2021
1 parent 1053452 commit 507b73f
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 62 deletions.
85 changes: 24 additions & 61 deletions phylogeny/distTree_inc_new.sh
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,11 @@ fi

VER=$(( $VER_OLD + 1 ))
echo $VER > $INC/version
echo "version: $VER"
section "version: $VER"


echo "new/ -> search/ ..."

section "new/ -> search/ ..."
echo "# Objects: $OBJS"

echo "To add at this step: $ADD"

$THIS/distTree_inc_new_list.sh $INC > $INC/new.list
Expand All @@ -75,38 +73,32 @@ rm $INC/new.list

$THIS/../trav -threads 15 $INC/search.list "mkdir $INC/search/%f"
$THIS/distTree_inc_new_cmd.sh $INC "rm" $INC/search.list
rm $INC/search.list


echo ""
echo "search/ -> leaf, dissim ..."
section "search/ -> leaf, dissim ..."

N=`ls $INC/search/ | wc -l`
if [ $N -gt 0 ]; then
rm -rf $INC/log/
mkdir $INC/log
$THIS/../trav $INC/search "touch $INC/log/%f"
SEARCH_GRID_MIN=$(( $GRID_MIN / 100 )) # PAR
GRID=0
if [ $N -lt $SEARCH_GRID_MIN ]; then
$THIS/../trav -step 1 $INC/search "$THIS/distTree_inc_search_init.sh $INC %f"
else
GRID=1
$THIS/../grid_wait.sh 1
UL1=""
if [ -e $INC/request_closest_sql ]; then
UL1=",ul1=30"
fi
$THIS/../trav -step 1 $INC/search "$QSUB_5$UL1 -N j%n %Q$THIS/distTree_inc_search_init.sh $INC %f%Q > /dev/null"
$THIS/../qstat_wait.sh 2000 1
if false; then # This is done by distTree_inc_search_init.sh
ls $INC/search/*/request | sed 's|/request$||1' | sed 's|^.*/||1' > $INC/sought.list
# Will break if "No such file or directory"
$THIS/../setMinus $INC/search.list $INC/sought.list > $INC/missed.list
rm $INC/sought.list
if [ -s $INC/missed.list ]; then
wc -l $INC/missed.list
$THIS/distTree_inc_new_cmd.sh $INC "touch" $INC/missed.list
fi
rm $INC/missed.list
fi
fi
$THIS/distTree_inc_new_log.sh $INC $GRID
fi
rm $INC/search.list


ITER=0
Expand All @@ -120,10 +112,9 @@ while [ $ITER -le $ITER_MAX ]; do
fi

ITER=$(( $ITER + 1 ))
echo ""
echo "Iteration $ITER / $ITER_MAX ..."
section "Iteration $ITER / $ITER_MAX ..."
# use distTree_inc_request2dissim.sh ??
REQ=`$THIS/../trav $INC/search "wc -l %d/%f/request" | cut -f 1 -d ' ' | $THIS/../dm/count | grep -w '^sum' | cut -f 2`
REQ=`$THIS/../trav $INC/search "cat %d/%f/request" | wc -l`
echo "# Requests: $REQ"
GRID=1
if [ $REQ -lt $GRID_MIN ]; then
Expand All @@ -145,36 +136,16 @@ while [ $ITER -le $ITER_MAX ]; do
$THIS/../qstat_wait.sh $WAIT 0
fi

ls $INC/log > $INC/log.list-all
ls $INC/search > $INC/search.list
$THIS/../setIntersect.sh $INC/log.list-all $INC/search.list 0 > $INC/log.list
rm $INC/log.list-all
rm $INC/search.list
L=`cat $INC/log.list | wc -l`
if [ $L -gt 0 ]; then
echo "# Failed tasks: $L"
if [ $GRID == 0 ]; then
exit 1
fi
# Try to fix grid problems
$THIS/../trav $INC/log.list "$THIS/distTree_inc_unsearch.sh $INC %f"
$THIS/../trav $INC/log "echo ''; echo %d/%f; tail -20 %d/%f" > $INC/log.out # PAR
head -21 $INC/log.out # PAR
rm $INC/log.out
fi
rm $INC/log.list

rm -r $INC/log/

$THIS/distTree_inc_new_log.sh $INC $GRID

$THIS/../trav -step 1 -threads 15 $INC/search "$THIS/distTree_inc_search2bad.sh $INC %f"

echo "Processing new objects ..."
$THIS/distTree_new $QC $INC/ -variance $VARIANCE
done


echo ""
echo "leaf, dissim.add -> tree, dissim ..."
section "leaf, dissim.add -> tree, dissim ..."

wc -l $INC/dissim.add
cat $INC/dissim.add >> $INC/dissim
Expand Down Expand Up @@ -219,42 +190,37 @@ cp /dev/null $INC/leaf
mv $INC/tree.new $INC/tree

if [ -s $INC/hist/leaf.$VER ]; then
echo ""
echo "Database: new -> tree ..."
section "Database: new -> tree ..."
cut -f 1 $INC/hist/leaf.$VER | sort > $INC/leaf.list
$INC/objects_in_tree.sh $INC/leaf.list 1
rm $INC/leaf.list
fi

if [ -e $INC/outlier-genogroup ]; then
echo ""
#echo "Database: genogroup outliers ..."
section "Database: genogroup outliers ..."
wc -l $INC/outlier-genogroup
$INC/objects_in_tree.sh $INC/outlier-genogroup null
mv $INC/outlier-genogroup $INC/hist/outlier-genogroup.$VER
fi

if [ -e $INC/outlier-criterion ]; then
echo ""
#echo "Database: criterion outliers ..."
section "Database: criterion outliers ..."
wc -l $INC/outlier-criterion
$INC/objects_in_tree.sh $INC/outlier-criterion null
$THIS/../trav $INC/outlier-criterion "$INC/outlier2db.sh %f criterion"
mv $INC/outlier-criterion $INC/hist/outlier-criterion.$VER
fi

if [ -e $INC/outlier-deformation ]; then
echo ""
#echo "Database: deformation outliers ..."
section "Database: deformation outliers ..."
wc -l $INC/outlier-deformation
$INC/objects_in_tree.sh $INC/outlier-deformation null
$THIS/../trav $INC/outlier-deformation "$INC/outlier2db.sh %f deformation"
mv $INC/outlier-deformation $INC/hist/outlier-deformation.$VER
fi

if [ "$HYBRIDNESS_MIN" != 0 ]; then
echo ""
echo "Hybrid ..."
section "Hybrid ..."
$THIS/distTree_inc_hybrid.sh $INC
#echo "Unhybrid ..."
#$THIS/distTree_inc_unhybrid.sh $INC
Expand All @@ -263,8 +229,7 @@ fi
# Must be the last database change in this script
GENOGROUP_BARRIER=`cat $INC/genogroup_barrier`
if [ "$GENOGROUP_BARRIER" != "NAN" ]; then
echo ""
echo "New genogroup outliers ..."
section "New genogroup outliers ..."
$THIS/tree2genogroup $INC/tree $GENOGROUP_BARRIER -genogroup_table $INC/genogroup_table
$INC/genogroup2db.sh $INC/genogroup_table > $INC/outlier-genogroup
mv $INC/genogroup_table $INC/hist/genogroup_table.$VER
Expand All @@ -277,7 +242,7 @@ if [ "$GENOGROUP_BARRIER" != "NAN" ]; then
fi


echo ""
section "Additional requests ..."
$THIS/distTree_inc_request2dissim.sh $INC $INC/dissim_request $INC/dissim.add-req
if [ -s $INC/dissim.add-req ]; then
grep -vwi nan $INC/dissim.add-req | grep -vwi inf >> $INC/dissim
Expand All @@ -289,14 +254,12 @@ rm $INC/dissim_request
$THIS/distTree_inc_tree1_quality.sh $INC


echo ""
echo "QC ..."
section "QC ..."
$INC/qc.sh go


echo ""
NEW=`$THIS/distTree_inc_new_list.sh $INC | wc -l`
echo "# New objects left: $NEW"
section "# New objects left: $NEW"
if [ $NEW == 0 ]; then
touch $INC/finished
else
Expand Down
34 changes: 34 additions & 0 deletions phylogeny/distTree_inc_new_log.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/bin/bash
THIS=`dirname $0`
source $THIS/../bash_common.sh
if [ $# -ne 2 ]; then
echo "Undo search for failed objects in #1/log/, remove #1/log"
echo "#1: incremental distance tree directory"
echo "#2: grid is used (0/1)"
exit 1
fi
INC=$1
GRID=$2


ls $INC/log > $INC/log.list-all
ls $INC/search > $INC/search.list
$THIS/../setIntersect.sh $INC/log.list-all $INC/search.list 0 > $INC/log.list
rm $INC/log.list-all
rm $INC/search.list
L=`cat $INC/log.list | wc -l`
if [ $L -gt 0 ]; then
echo -e "${RED}# Failed tasks: $L${NOCOLOR}"
if [ $GRID == 0 ]; then
exit 1
fi
# Try to fix grid problems
$THIS/../trav $INC/log.list -step 1 "$THIS/distTree_inc_unsearch.sh $INC %f"
$THIS/../trav $INC/log "echo ''; echo %d/%f; tail -20 %d/%f" > $INC/log.out # PAR
head -21 $INC/log.out # PAR
rm $INC/log.out
fi
rm $INC/log.list

rm -r $INC/log/

3 changes: 2 additions & 1 deletion version.inc
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#define VERSION "1.4.1"
#define VERSION "1.4.2"


// 1.4.2 01/28/2021 distTree_inc_new_log.sh
// 1.4.1 01/24/2021 inc/{large,request_closest_sql} /dev/null or absent
// 1.3.2 01/10/2021 GeneMark2CDS prints incomplete and ambiguous proteins as well
// 1.3.1 12/18/2020 inc/phen_large --> inc/large; inc/request_closest_sql
Expand Down

0 comments on commit 507b73f

Please sign in to comment.