Skip to content

Commit 01972d5

Browse files
cseeddanking
authored andcommitted
move from_spark, etc. to SparkBackend (hail-is#5019)
* move from_spark, etc. to SparkBackend * fixed bug * fixed bug
1 parent 0ea8c46 commit 01972d5

File tree

3 files changed

+27
-8
lines changed

3 files changed

+27
-8
lines changed

hail/python/hail/backend/backend.py

+16
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
from hail.expr.table_type import *
66
from hail.expr.matrix_type import *
77
from hail.ir.renderer import Renderer
8+
from hail.table import Table
89

10+
import pyspark
911

1012
class Backend(object):
1113
@abc.abstractmethod
@@ -43,6 +45,20 @@ def matrix_read_type(self, mir):
4345
jir = self._to_java_ir(mir)
4446
return tmatrix._from_java(jir.typ())
4547

48+
def from_spark(self, df, key):
49+
return Table._from_java(Env.hail().table.Table.fromDF(Env.hc()._jhc, df._jdf, key))
50+
51+
def to_spark(self, t, flatten):
52+
t = t.expand_types()
53+
if flatten:
54+
t = t.flatten()
55+
return pyspark.sql.DataFrame(t._jt.toDF(Env.hc()._jsql_context), Env.sql_context())
56+
57+
def to_pandas(self, t, flatten):
58+
return self.to_spark(t, flatten).toPandas()
59+
60+
def from_pandas(self, df, key):
61+
return Table.from_spark(Env.sql_context().createDataFrame(df), key)
4662

4763
class ServiceBackend(Backend):
4864
def __init__(self, host, port=80, scheme='http'):

hail/python/hail/table.py

+4-8
Original file line numberDiff line numberDiff line change
@@ -2493,8 +2493,7 @@ def from_spark(df, key=[]):
24932493
:class:`.Table`
24942494
Table constructed from the Spark SQL DataFrame.
24952495
"""
2496-
2497-
return Table._from_java(Env.hail().table.Table.fromDF(Env.hc()._jhc, df._jdf, key))
2496+
return Env.spark_backend('from_spark').from_spark(df, key)
24982497

24992498
@typecheck_method(flatten=bool)
25002499
def to_spark(self, flatten=True):
@@ -2513,10 +2512,7 @@ def to_spark(self, flatten=True):
25132512
:class:`.pyspark.sql.DataFrame`
25142513
25152514
"""
2516-
t = self.expand_types()
2517-
if flatten:
2518-
t = t.flatten()
2519-
return pyspark.sql.DataFrame(t._jt.toDF(Env.hc()._jsql_context), Env.sql_context())
2515+
return Env.spark_backend('to_spark').to_spark(self, flatten)
25202516

25212517
@typecheck_method(flatten=bool)
25222518
def to_pandas(self, flatten=True):
@@ -2536,7 +2532,7 @@ def to_pandas(self, flatten=True):
25362532
:class:`.pandas.DataFrame`
25372533
25382534
"""
2539-
return self.to_spark(flatten).toPandas()
2535+
return Env.spark_backend('to_pandas').to_pandas(self, flatten)
25402536

25412537
@staticmethod
25422538
@typecheck(df=pandas.DataFrame,
@@ -2560,7 +2556,7 @@ def from_pandas(df, key=[]):
25602556
-------
25612557
:class:`.Table`
25622558
"""
2563-
return Table.from_spark(Env.sql_context().createDataFrame(df), key)
2559+
return Env.spark_backend('from_pandas').from_pandas(df, key)
25642560

25652561
@typecheck_method(other=table_type, tolerance=nullable(numeric), absolute=bool)
25662562
def _same(self, other, tolerance=1e-6, absolute=False):

hail/python/hail/utils/java.py

+7
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,13 @@ def hc():
6565
def backend():
6666
return Env.hc()._backend
6767

68+
def spark_backend(op):
69+
b = Env.backend()
70+
if isinstance(b, hail.backend.SparkBackend):
71+
return b
72+
else:
73+
raise NotImplementedError(f"{b.__class__.__name__} doesn't support {op}, only SparkBackend")
74+
6875
@staticmethod
6976
def sql_context():
7077
return Env.hc()._sql_context

0 commit comments

Comments
 (0)