diff --git a/hail/hail-ci-build.sh b/hail/hail-ci-build.sh index 98e9e7c768b..b1503118b2f 100644 --- a/hail/hail-ci-build.sh +++ b/hail/hail-ci-build.sh @@ -241,11 +241,9 @@ test_gcp() { --zip gs://hail-ci-0-1/temp/$SOURCE_SHA/$TARGET_SHA/hail.zip \ --vep - time cluster submit ${CLUSTER_NAME} \ - cluster-sanity-check.py - - time cluster submit ${CLUSTER_NAME} \ - cluster-vep-check.py + for script in python/cluster-tests/**.py; do + time cluster submit ${CLUSTER_NAME} $script + done time cluster stop ${CLUSTER_NAME} --async touch ${GCP_SUCCESS} diff --git a/hail/python/cluster-tests/cluster-read-vcfs-check.py b/hail/python/cluster-tests/cluster-read-vcfs-check.py new file mode 100644 index 00000000000..d9138a9f1ab --- /dev/null +++ b/hail/python/cluster-tests/cluster-read-vcfs-check.py @@ -0,0 +1,22 @@ +import json +import hail as hl + +gvcfs = ['gs://hail-ci/gvcfs/HG00096.g.vcf.gz', + 'gs://hail-ci/gvcfs/HG00268.g.vcf.gz'] +hl.init(default_reference='GRCh38') +parts = [ + {'start': {'locus': {'contig': 'chr20', 'position': 17821257}}, + 'end': {'locus': {'contig': 'chr20', 'position': 18708366}}, + 'includeStart': True, + 'includeEnd': True}, + {'start': {'locus': {'contig': 'chr20', 'position': 18708367}}, + 'end': {'locus': {'contig': 'chr20', 'position': 19776611}}, + 'includeStart': True, + 'includeEnd': True}, + {'start': {'locus': {'contig': 'chr20', 'position': 19776612}}, + 'end': {'locus': {'contig': 'chr20', 'position': 21144633}}, + 'includeStart': True, + 'includeEnd': True}, +] +parts_str = json.dumps(parts) +vcfs = hl.import_vcfs(gvcfs, parts_str) diff --git a/hail/cluster-sanity-check.py b/hail/python/cluster-tests/cluster-sanity-check.py similarity index 100% rename from hail/cluster-sanity-check.py rename to hail/python/cluster-tests/cluster-sanity-check.py diff --git a/hail/cluster-vep-check.py b/hail/python/cluster-tests/cluster-vep-check.py similarity index 100% rename from hail/cluster-vep-check.py rename to hail/python/cluster-tests/cluster-vep-check.py diff --git a/hail/src/main/scala/is/hail/io/tabix/TabixReader.scala b/hail/src/main/scala/is/hail/io/tabix/TabixReader.scala index 277e2bd46b8..9b99decc8e8 100644 --- a/hail/src/main/scala/is/hail/io/tabix/TabixReader.scala +++ b/hail/src/main/scala/is/hail/io/tabix/TabixReader.scala @@ -95,7 +95,7 @@ object TabixReader { } -class TabixReader(val filePath: String, private val idxFilePath: Option[String] = None) { +class TabixReader(val filePath: String, private val hConf: hd.conf.Configuration, private val idxFilePath: Option[String] = None) { import TabixReader._ val indexPath: String = idxFilePath match { @@ -108,8 +108,6 @@ class TabixReader(val filePath: String, private val idxFilePath: Option[String] } } - private val hConf = HailContext.get.hadoopConf - val index: Tabix = hConf.readFile(indexPath) { is => var buf = new Array[Byte](4) is.read(buf, 0, 4) // read magic bytes "TBI\1" diff --git a/hail/src/main/scala/is/hail/io/vcf/LoadVCF.scala b/hail/src/main/scala/is/hail/io/vcf/LoadVCF.scala index 28a922b6ccb..efffe8e123d 100644 --- a/hail/src/main/scala/is/hail/io/vcf/LoadVCF.scala +++ b/hail/src/main/scala/is/hail/io/vcf/LoadVCF.scala @@ -1214,7 +1214,7 @@ case class VCFsReader( entryType = genotypeSignature) val partitions = { - val r = new TabixReader(file) + val r = new TabixReader(file, hConf) localRangeBounds.zipWithIndex.map { case (b, i) => if (!(b.includesStart && b.includesEnd)) fatal("range bounds must be inclusive") diff --git a/hail/src/test/scala/is/hail/io/TabixSuite.scala b/hail/src/test/scala/is/hail/io/TabixSuite.scala index 72a47e4a9ca..f2d35497dd6 100644 --- a/hail/src/test/scala/is/hail/io/TabixSuite.scala +++ b/hail/src/test/scala/is/hail/io/TabixSuite.scala @@ -17,8 +17,8 @@ class TabixSuite extends SparkSuite { val vcfGzFile = vcfFile + ".gz" val vcfGzTbiFile = vcfGzFile + ".tbi" - lazy val reader = new TabixReader(vcfGzFile) lazy val bcConf = hc.sc.broadcast(new SerializableHadoopConfiguration(hc.hadoopConf)) + lazy val reader = new TabixReader(vcfGzFile, hc.hadoopConf) @BeforeTest def initialize() { hc // reference to initialize @@ -96,7 +96,7 @@ class TabixSuite extends SparkSuite { val vcfFile = "src/test/resources/sample.vcf.bgz" val chr = "20" val htsjdkrdr = new HtsjdkTabixReader(vcfFile) - val hailrdr = new TabixReader(vcfFile) + val hailrdr = new TabixReader(vcfFile, hc.hadoopConf) val tid = hailrdr.chr2tid(chr) for ((start, end) <-