Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rewrite computation of max and min in dataframe editor #3353

Merged
merged 5 commits into from
Aug 17, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 64 additions & 53 deletions spyderlib/widgets/variableexplorer/dataframeeditor.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,27 @@
from spyderlib.widgets.variableexplorer.arrayeditor import get_idx_rect

# Supported Numbers and complex numbers
_sup_nr = (float, int, np.int64, np.int32)
_sup_com = (complex, np.complex64, np.complex128)
REAL_NUMBER_TYPES = (float, int, np.int64, np.int32)
COMPLEX_NUMBER_TYPES = (complex, np.complex64, np.complex128)
# Used to convert bool intrance to false since bool('False') will return True
_bool_false = ['false', '0']


# Limit at which dataframe is considered so large that it is loaded on demand
LARGE_SIZE = 5e5
LARGE_NROWS = 1e5
LARGE_COLS = 60

# Background colours
BACKGROUND_NUMBER_MINHUE = 0.66 # hue for largest number
BACKGROUND_NUMBER_HUERANGE = 0.33 # (hue for smallest) minus (hue for largest)
BACKGROUND_NUMBER_SATURATION = 0.7
BACKGROUND_NUMBER_VALUE = 1.0
BACKGROUND_NUMBER_ALPHA = 0.6
BACKGROUND_NONNUMBER_COLOR = Qt.lightGray
BACKGROUND_INDEX_ALPHA = 0.8
BACKGROUND_STRING_ALPHA = 0.05
BACKGROUND_MISC_ALPHA = 0.3


def bool_false_check(value):
"""
Expand All @@ -59,7 +70,8 @@ def bool_false_check(value):

def global_max(col_vals, index):
"""Returns the global maximum and minimum"""
max_col, min_col = zip(*col_vals)
col_vals_without_None = [x for x in col_vals if x is not None]
max_col, min_col = zip(*col_vals_without_None)
return max(max_col), min(min_col)


Expand All @@ -82,12 +94,6 @@ def __init__(self, dataFrame, format="%.3g", parent=None):
self.total_cols = self.df.shape[1]
size = self.total_rows * self.total_cols

huerange = [.66, .99] # Hue
self.sat = .7 # Saturation
self.val = 1. # Value
self.alp = .6 # Alpha-channel
self.hue0 = huerange[0]
self.dhue = huerange[1]-huerange[0]
self.max_min_col = None
if size < LARGE_SIZE:
self.max_min_col_update()
Expand Down Expand Up @@ -115,34 +121,36 @@ def __init__(self, dataFrame, format="%.3g", parent=None):
self.cols_loaded = self.total_cols

def max_min_col_update(self):
"""Determines the maximum and minimum number in each column"""
# If there are no rows to compute max/min then return
if self.df.shape[0] == 0:
"""
Determines the maximum and minimum number in each column.

The result is a list whose k-th entry is [vmax, vmin], where vmax and
vmin denote the maximum and minimum of the k-th column (ignoring NaN).
This list is stored in self.max_min_col.

If the k-th column has a non-numerical dtype, then the k-th entry
is set to None. If the dtype is complex, then compute the maximum and
minimum of the absolute values. If vmax equals vmin, then vmin is
decreased by one.
"""
if self.df.shape[0] == 0: # If no rows to compute max/min then return
return
max_r = self.df.max(numeric_only=True)
min_r = self.df.min(numeric_only=True)
self.max_min_col = list(zip(max_r, min_r))
if len(self.max_min_col) != self.df.shape[1]:
# Then it contain complex numbers or other types
float_intran = self.df.applymap(lambda e: isinstance(e, _sup_nr))
self.complex_intran = self.df.applymap(lambda e:
isinstance(e, _sup_com))
mask = float_intran & (~ self.complex_intran)
try:
df_abs = self.df[self.complex_intran].abs()
except TypeError:
df_abs = self.df[self.complex_intran]
max_c = df_abs.max(skipna=True)
min_c = df_abs.min(skipna=True)
df_real = self.df[mask]
max_r = df_real.max(skipna=True)
min_r = df_real.min(skipna=True)
self.max_min_col = list(zip(DataFrame([max_c,
max_r]).max(skipna=True),
DataFrame([min_c,
min_r]).min(skipna=True)))
self.max_min_col = [[vmax, vmin-1] if vmax == vmin else [vmax, vmin]
for vmax, vmin in self.max_min_col]
self.max_min_col = []
for dummy, col in self.df.iteritems():
if col.dtype in REAL_NUMBER_TYPES + COMPLEX_NUMBER_TYPES:
if col.dtype in REAL_NUMBER_TYPES:
vmax = col.max(skipna=True)
vmin = col.min(skipna=True)
else:
vmax = col.abs().max(skipna=True)
vmin = col.abs().min(skipna=True)
if vmax != vmin:
max_min = [vmax, vmin]
else:
max_min = [vmax, vmin - 1]
else:
max_min = None
self.max_min_col.append(max_min)

def get_format(self):
"""Return current format"""
Expand Down Expand Up @@ -197,29 +205,31 @@ def get_bgcolor(self, index):
"""Background color depending on value"""
column = index.column()
if column == 0:
color = QColor(Qt.lightGray)
color.setAlphaF(.8)
color = QColor(BACKGROUND_NONNUMBER_COLOR)
color.setAlphaF(BACKGROUND_INDEX_ALPHA)
return color
if not self.bgcolor_enabled:
return
value = self.get_value(index.row(), column-1)
if isinstance(value, _sup_com):
color_func = abs
if self.max_min_col[column - 1] is None:
color = QColor(BACKGROUND_NONNUMBER_COLOR)
if is_text_string(value):
color.setAlphaF(BACKGROUND_STRING_ALPHA)
else:
color.setAlphaF(BACKGROUND_MISC_ALPHA)
else:
color_func = float
if isinstance(value, _sup_nr+_sup_com) and self.bgcolor_enabled:
if isinstance(value, COMPLEX_NUMBER_TYPES):
color_func = abs
else:
color_func = float
vmax, vmin = self.return_max(self.max_min_col, column-1)
hue = self.hue0 + self.dhue*(vmax-color_func(value)) / (vmax-vmin)
hue = (BACKGROUND_NUMBER_MINHUE + BACKGROUND_NUMBER_HUERANGE *
(vmax - color_func(value)) / (vmax - vmin))
hue = float(abs(hue))
if hue > 1:
hue = 1
color = QColor.fromHsvF(hue, self.sat, self.val, self.alp)
elif is_text_string(value):
color = QColor(Qt.lightGray)
color.setAlphaF(.05)
else:
color = QColor(Qt.lightGray)
color.setAlphaF(.3)
color = QColor.fromHsvF(hue, BACKGROUND_NUMBER_SATURATION,
BACKGROUND_NUMBER_VALUE, BACKGROUND_NUMBER_ALPHA)
return color

def get_value(self, row, column):
Expand Down Expand Up @@ -318,8 +328,9 @@ def setData(self, index, value, role=Qt.EditRole, change_type=None):
current_value = self.get_value(row, column-1)
if isinstance(current_value, bool):
val = bool_false_check(val)
if isinstance(current_value, ((bool,) + _sup_nr + _sup_com)) or \
is_text_string(current_value):
supported_types = (bool,) + REAL_NUMBER_TYPES + COMPLEX_NUMBER_TYPES
if (isinstance(current_value, supported_types) or
is_text_string(current_value)):
try:
self.df.iloc[row, column-1] = current_value.__class__(val)
except ValueError as e:
Expand Down
121 changes: 106 additions & 15 deletions spyderlib/widgets/variableexplorer/tests/test_dataframeeditor.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,38 +7,56 @@
Tests for dataframeeditor.py
"""

from __future__ import division

# Third party imports
from pandas import DataFrame
from pandas import DataFrame, date_range
from qtpy.QtGui import QColor
import pytest

# Local imports
from spyderlib.widgets.variableexplorer import dataframeeditor
from spyderlib.widgets.variableexplorer.dataframeeditor import DataFrameModel

# Helper functions
def colorclose(color, hsva_expected):
"""
Compares HSV values which are stored as 16-bit integers.
"""
hsva_actual = color.getHsvF()
return all(abs(a-b) <= 2**(-16) for (a,b) in zip(hsva_actual, hsva_expected))

def data(dfm, i, j):
return dfm.data(dfm.createIndex(i, j))

def bgcolor(dfm, i, j):
return dfm.get_bgcolor(dfm.createIndex(i, j))

# --- Tests
# -----------------------------------------------------------------------------

def test_dataframemodel_basic():
df = DataFrame({'colA': [1, 3], 'colB': ['c', 'a']})
dfm = DataFrameModel(df)
assert dfm.rowCount() == 2
assert dfm.columnCount() == 3
assert dfm.data(dfm.createIndex(0, 0)) == '0'
assert dfm.data(dfm.createIndex(0, 1)) == '1'
assert dfm.data(dfm.createIndex(0, 2)) == 'c'
assert dfm.data(dfm.createIndex(1, 0)) == '1'
assert dfm.data(dfm.createIndex(1, 1)) == '3'
assert dfm.data(dfm.createIndex(1, 2)) == 'a'
assert data(dfm, 0, 0) == '0'
assert data(dfm, 0, 1) == '1'
assert data(dfm, 0, 2) == 'c'
assert data(dfm, 1, 0) == '1'
assert data(dfm, 1, 1) == '3'
assert data(dfm, 1, 2) == 'a'

def test_dataframemodel_sort():
df = DataFrame({'colA': [1, 3], 'colB': ['c', 'a']})
dfm = DataFrameModel(df)
dfm.sort(2)
assert dfm.data(dfm.createIndex(0, 0)) == '1'
assert dfm.data(dfm.createIndex(0, 1)) == '3'
assert dfm.data(dfm.createIndex(0, 2)) == 'a'
assert dfm.data(dfm.createIndex(1, 0)) == '0'
assert dfm.data(dfm.createIndex(1, 1)) == '1'
assert dfm.data(dfm.createIndex(1, 2)) == 'c'
assert data(dfm, 0, 0) == '1'
assert data(dfm, 0, 1) == '3'
assert data(dfm, 0, 2) == 'a'
assert data(dfm, 1, 0) == '0'
assert data(dfm, 1, 1) == '1'
assert data(dfm, 1, 2) == 'c'

def test_dataframemodel_sort_is_stable(): # cf. issue 3010
df = DataFrame([[2,14], [2,13], [2,16], [1,3], [2,9], [1,15], [1,17],
Expand All @@ -47,11 +65,84 @@ def test_dataframemodel_sort_is_stable(): # cf. issue 3010
dfm = DataFrameModel(df)
dfm.sort(2)
dfm.sort(1)
col2 = [dfm.data(dfm.createIndex(i, 2)) for i in range(len(df))]
col2 = [data(dfm, i, 2) for i in range(len(df))]
assert col2 == [str(x) for x in [1, 3, 4, 6, 11, 12, 15, 17,
2, 5, 7, 8, 9, 10, 13, 14, 16]]

def test_dataframemodel_max_min_col_update():
df = DataFrame([[1, 2.0], [2, 2.5], [3, 9.0]])
dfm = DataFrameModel(df)
assert dfm.max_min_col == [[3, 1], [9.0, 2.0]]

def test_dataframemodel_max_min_col_update_constant():
df = DataFrame([[1, 2.0], [1, 2.0], [1, 2.0]])
dfm = DataFrameModel(df)
assert dfm.max_min_col == [[1, 0], [2.0, 1.0]]

def test_dataframemodel_with_timezone_aware_timestamps(): # cf. issue 2940
df = DataFrame([x] for x in date_range('20150101', periods=5, tz='UTC'))
dfm = DataFrameModel(df)
assert dfm.max_min_col == [None]

def test_dataframemodel_with_categories(): # cf. issue 3308
df = DataFrame({"id": [1, 2, 3, 4, 5, 6],
"raw_grade": ['a', 'b', 'b', 'a', 'a', 'e']})
df["grade"] = df["raw_grade"].astype("category")
dfm = DataFrameModel(df)
assert dfm.max_min_col == [[6, 1], None, None]

def test_dataframemodel_get_bgcolor_with_numbers():
df = DataFrame([[0, 10], [1, 20], [2, 40]])
dfm = DataFrameModel(df)
h0 = dataframeeditor.BACKGROUND_NUMBER_MINHUE
dh = dataframeeditor.BACKGROUND_NUMBER_HUERANGE
s = dataframeeditor.BACKGROUND_NUMBER_SATURATION
v = dataframeeditor.BACKGROUND_NUMBER_VALUE
a = dataframeeditor.BACKGROUND_NUMBER_ALPHA
assert colorclose(bgcolor(dfm, 0, 1), (h0 + dh, s, v, a))
assert colorclose(bgcolor(dfm, 1, 1), (h0 + 1 / 2 * dh, s, v, a))
assert colorclose(bgcolor(dfm, 2, 1), (h0, s, v, a))
assert colorclose(bgcolor(dfm, 0, 2), (h0 + dh, s, v, a))
assert colorclose(bgcolor(dfm, 1, 2), (h0 + 2 / 3 * dh, s, v, a))
assert colorclose(bgcolor(dfm, 2, 2), (h0, s, v, a))

def test_dataframemodel_get_bgcolor_with_numbers_using_global_max():
df = DataFrame([[0, 10], [1, 20], [2, 40]])
dfm = DataFrameModel(df)
dfm.colum_avg(0)
h0 = dataframeeditor.BACKGROUND_NUMBER_MINHUE
dh = dataframeeditor.BACKGROUND_NUMBER_HUERANGE
s = dataframeeditor.BACKGROUND_NUMBER_SATURATION
v = dataframeeditor.BACKGROUND_NUMBER_VALUE
a = dataframeeditor.BACKGROUND_NUMBER_ALPHA
assert colorclose(bgcolor(dfm, 0, 1), (h0 + dh, s, v, a))
assert colorclose(bgcolor(dfm, 1, 1), (h0 + 39 / 40 * dh, s, v, a))
assert colorclose(bgcolor(dfm, 2, 1), (h0 + 38 / 40 * dh, s, v, a))
assert colorclose(bgcolor(dfm, 0, 2), (h0 + 30 / 40 * dh, s, v, a))
assert colorclose(bgcolor(dfm, 1, 2), (h0 + 20 / 40 * dh, s, v, a))
assert colorclose(bgcolor(dfm, 2, 2), (h0, s, v, a))

def test_dataframemodel_get_bgcolor_for_index():
df = DataFrame([[0]])
dfm = DataFrameModel(df)
h, s, v, dummy = QColor(dataframeeditor.BACKGROUND_NONNUMBER_COLOR).getHsvF()
a = dataframeeditor.BACKGROUND_INDEX_ALPHA
assert colorclose(bgcolor(dfm, 0, 0), (h, s, v, a))

def test_dataframemodel_get_bgcolor_with_string():
df = DataFrame([['xxx']])
dfm = DataFrameModel(df)
h, s, v, dummy = QColor(dataframeeditor.BACKGROUND_NONNUMBER_COLOR).getHsvF()
a = dataframeeditor.BACKGROUND_STRING_ALPHA
assert colorclose(bgcolor(dfm, 0, 1), (h, s, v, a))

def test_dataframemodel_get_bgcolor_with_object():
df = DataFrame([[None]])
dfm = DataFrameModel(df)
h, s, v, dummy = QColor(dataframeeditor.BACKGROUND_NONNUMBER_COLOR).getHsvF()
a = dataframeeditor.BACKGROUND_MISC_ALPHA
assert colorclose(bgcolor(dfm, 0, 1), (h, s, v, a))


if __name__ == "__main__":
pytest.main()