Skip to content

Commit

Permalink
HiveHook fix get_pandas_df() failure when it tries to read an empty t…
Browse files Browse the repository at this point in the history
…able (#17777)
  • Loading branch information
iblaine authored Aug 30, 2021
1 parent 1d71441 commit da99c3f
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 9 deletions.
3 changes: 1 addition & 2 deletions airflow/providers/apache/hive/hooks/hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -1066,6 +1066,5 @@ def get_pandas_df( # type: ignore
:return: pandas.DateFrame
"""
res = self.get_results(hql, schema=schema, hive_conf=hive_conf)
df = pandas.DataFrame(res['data'], **kwargs)
df.columns = [c[0] for c in res['header']]
df = pandas.DataFrame(res['data'], columns=[c[0] for c in res['header']], **kwargs)
return df
9 changes: 8 additions & 1 deletion tests/providers/apache/hive/hooks/test_hive.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
from airflow.utils.operator_helpers import AIRFLOW_VAR_NAME_FORMAT_MAPPING
from tests.test_utils.asserts import assert_equal_ignore_multiple_spaces
from tests.test_utils.mock_hooks import MockHiveCliHook, MockHiveServer2Hook
from tests.test_utils.mock_process import MockSubProcess
from tests.test_utils.mock_process import EmptyMockConnectionCursor, MockSubProcess

DEFAULT_DATE = timezone.datetime(2015, 1, 1)
DEFAULT_DATE_ISO = DEFAULT_DATE.isoformat()
Expand Down Expand Up @@ -687,6 +687,13 @@ def test_get_pandas_df(self):
hook.mock_cursor.execute.assert_any_call('set airflow.ctx.dag_owner=airflow')
hook.mock_cursor.execute.assert_any_call('set airflow.ctx.dag_email=test@airflow.com')

hook = MockHiveServer2Hook(connection_cursor=EmptyMockConnectionCursor())
query = f"SELECT * FROM {self.table}"

df = hook.get_pandas_df(query, schema=self.database)

assert len(df) == 0

def test_get_results_header(self):
hook = MockHiveServer2Hook()

Expand Down
23 changes: 17 additions & 6 deletions tests/test_utils/mock_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,13 @@ def wait(self):
return


class MockConnectionCursor:
def __init__(self, *args, **kwargs):
class BaseMockConnectionCursor:
def __init__(self, **kwargs):
self.arraysize = None
self.description = [
('hive_server_hook.a', 'INT_TYPE', None, None, None, None, True),
('hive_server_hook.b', 'INT_TYPE', None, None, None, None, True),
]
self.iterable = [(1, 1), (2, 2)]
self.conn_exists = kwargs.get('exists', True)

def close(self):
Expand All @@ -68,13 +67,13 @@ def cursor(self):
def execute(self, values=None):
pass

def exists(self, *args):
def exists(self):
return self.conn_exists

def isfile(self, *args):
def isfile(self):
return True

def remove(self, *args):
def remove(self):
pass

def upload(self, local_filepath, destination_filepath):
Expand All @@ -85,3 +84,15 @@ def __next__(self):

def __iter__(self):
yield from self.iterable


class MockConnectionCursor(BaseMockConnectionCursor):
def __init__(self):
super().__init__()
self.iterable = [(1, 1), (2, 2)]


class EmptyMockConnectionCursor(BaseMockConnectionCursor):
def __init__(self):
super().__init__()
self.iterable = []

0 comments on commit da99c3f

Please sign in to comment.