-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathwrapper_f50b4_countSitesPerIndividual_20210127.sh
55 lines (42 loc) · 1.95 KB
/
wrapper_f50b4_countSitesPerIndividual_20210127.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/bin/bash
#$ -l h_data=8G,h_vmem=10G,h_rt=23:00:00
#$ -wd <homedir>
#$ -o <homedir>/reports/Summary_stats/f50b4_countSitesPerIndividual_20210127.out.txt
#$ -e <homedir>/reports/Summary_stats/f50b4_countSitesPerIndividual_20210127.err.txt
#$ -m abe
# @version v1
# @script wrapper_f50b4_countSitesPerIndividual_20210127.sh
# @usage qsub -t 1-96 wrapper_f50b4_countSitesPerIndividual_20210127.sh
# @description Wrapper of calling the countSitesPerIndividual.py for the baleen_genomes dataset
# Author: Meixi Lin
# Date: Wed Jan 27 13:24:01 2021
# Adapted from wrapper_ALLregions_CDS_countSitesPerIndividual_20201228.sh
###########################################################
## import packages
sleep $((RANDOM % 120))
conda activate gentools
set -eo pipefail
###########################################################
## def functions
###########################################################
## input variables
DATASET="f50b4"
REF="Minke"
CDSTYPE="filteredvcf"
IDX=$(printf %02d ${SGE_TASK_ID})
## def variables
TODAY=$(date "+%Y%m%d")
HOMEDIR=<homedir>
WORKSCRIPT=${HOMEDIR}/scripts/Summary_stats/count_sites/countSitesPerIndividual.py
COMMITID=$(git --git-dir="${HOMEDIR}/scripts/.git" --work-tree="${HOMEDIR}/scripts" rev-parse master)
VCFDIR=${HOMEDIR}/baleen_genomes/filteredvcf/${DATASET}/${REF}
OUTDIR=${HOMEDIR}/Summary_stats/${DATASET}/${REF}/count_sites_${CDSTYPE}_${TODAY}
VCF="JointCalls_${DATASET}_08_B_VariantFiltration_${IDX}.vcf.gz"
mkdir -p ${OUTDIR}
###########################################################
## main
# first check the overall vcf.gz file
echo -e "[$(date "+%Y-%m-%d %T")] JOB ID ${JOB_ID}.${SGE_TASK_ID}; Count sites for ${VCFDIR}/${VCF}; WORKSCRIPT=${WORKSCRIPT}; git commit id: ${COMMITID}"
python ${WORKSCRIPT} --vcf ${VCFDIR}/${VCF} --outfile ${OUTDIR}/${DATASET}_${REF}_${IDX}_sites_summary.txt --filter "PASS" --contig ${IDX}
echo -e "[$(date "+%Y-%m-%d %T")] JOB ID ${JOB_ID}.${SGE_TASK_ID} Done"
conda deactivate