bin/jvariant-spark

#!/bin/bash
set -e

PWD=$(cd "`dirname $0`"/..; pwd)

function fatal_error () {
	echo "ERROR: $1" 1>&2
	exit 1
}

WITH_HAIL=NO
#parse command line
while [[ $# -gt 0 ]]; do
        case $1 in
            -wh|--with-hail)
            WITH_HAIL=YES
            shift # past argument
            ;;
            *)    # unknown option
            shift # past argument
            ;;
        esac
done


if [[ -z "${VARSPARK_HOME}" ]]; then
	VARSPARK_HOME="${PWD}"
fi

VS_ASSEMBLY_JAR="`${VARSPARK_HOME}/bin/find-varspark-jar`"


[[ $(type -P "pyspark") ]] || fatal_error  "\`pyspark\` cannot be found. Please make sure it's on your PATH."


SPARK_VERSION=`spark-submit --version 2>&1 | grep -m 1 version | awk -F 'version ' '{print $2}'`

if [[ -f "${PWD}/python/lib/varspark-src.zip" ]]; then
    export PYTHONPATH=$(printf ":%s" `ls ${PWD}/python/lib/*.zip`)
fi

if [[ -f "${PWD}/python/varspark/__init__.py" ]]; then
    export PYTHONPATH=${PWD}/python$(printf ":%s" `ls ${PWD}/python/lib/*.zip`)
fi

export PYSPARK_DRIVER_PYTHON='jupyter'
export PYSPARK_DRIVER_PYTHON_OPTS='notebook'


if [ "$WITH_HAIL" == "YES" ]; then  

    [[ -n "${HAIL_HOME}" ]] || fatal_error  "\`pyspark\` cannot be found. Please make sure it's on your PATH."
    if [ -f "${HAIL_HOME}/jars/hail-all-spark.jar" ]; then
        VS_HAIL_JAR="${HAIL_HOME}/jars/hail-all-spark.jar"
    elif [ -f "${HAIL_HOME}/build/libs/hail-all-spark.jar" ]; then
        VS_HAIL_JAR="${HAIL_HOME}/build/libs/hail-all-spark.jar"
    else
        fatal "Cannot locate hail-all-spark.jar under ${HAIL_HOME}."
    fi
    export PYTHONPATH=$PYTHONPATH:"${HAIL_HOME}/python"

    echo "Hail jar: $VS_HAIL_JAR"
  
    pyspark \
    --driver-class-path "${VS_HAIL_JAR}" \
    --conf "spark.hadoop.io.compression.codecs=org.apache.hadoop.io.compress.DefaultCodec,is.hail.io.compress.BGzipCodec,org.apache.hadoop.io.compress.GzipCodec" \
    --conf "spark.sql.files.openCostInBytes=53687091200" \
    --conf "spark.sql.files.maxPartitionBytes=53687091200" \
    --conf "spark.executor.extraClassPath=${VS_HAIL_JAR}" \
    --conf "spark.sql.catalogImplementation=in-memory" \
    --jars "${VS_ASSEMBLY_JAR}" 
else
    pyspark --jars "${VS_ASSEMBLY_JAR}" \
    --conf "spark.sql.catalogImplementation=in-memory" 
fi