Description
Current Behaviour
IndexError Traceback (most recent call last)
in
----> 1 profile_report.get_description()
~/anaconda3/envs/py3.7/lib/python3.7/site-packages/typeguard/init.py in wrapper(*args, **kwargs)
1031 memo = _CallMemo(python_func, _localns, args=args, kwargs=kwargs)
1032 check_argument_types(memo)
-> 1033 retval = func(*args, **kwargs)
1034 try:
1035 check_return_type(retval, memo)
~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/profile_report.py in get_description(self)
315 Dict containing a description for each variable in the DataFrame.
316 """
--> 317 return self.description_set
318
319 def get_rejected_variables(self) -> set:
~/anaconda3/envs/py3.7/lib/python3.7/site-packages/typeguard/init.py in wrapper(*args, **kwargs)
1031 memo = _CallMemo(python_func, _localns, args=args, kwargs=kwargs)
1032 check_argument_types(memo)
-> 1033 retval = func(*args, **kwargs)
1034 try:
1035 check_return_type(retval, memo)
~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/profile_report.py in description_set(self)
251 self.summarizer,
252 self.typeset,
--> 253 self._sample,
254 )
255 return self._description_set
~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/describe.py in describe(config, df, summarizer, typeset, sample)
70 pbar.total += len(df.columns)
71 series_description = get_series_descriptions(
---> 72 config, df, summarizer, typeset, pbar
73 )
74
~/anaconda3/envs/py3.7/lib/python3.7/site-packages/multimethod/init.py in call(self, *args, **kwargs)
313 func = self[tuple(func(arg) for func, arg in zip(self.type_checkers, args))]
314 try:
--> 315 return func(*args, **kwargs)
316 except TypeError as ex:
317 raise DispatchError(f"Function {func.code}") from ex
~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/pandas/summary_pandas.py in pandas_get_series_descriptions(config, df, summarizer, typeset, pbar)
98 with multiprocessing.pool.ThreadPool(pool_size) as executor:
99 for i, (column, description) in enumerate(
--> 100 executor.imap_unordered(multiprocess_1d, args)
101 ):
102 pbar.set_postfix_str(f"Describe variable:{column}")
~/anaconda3/envs/py3.7/lib/python3.7/multiprocessing/pool.py in next(self, timeout)
746 if success:
747 return value
--> 748 raise value
749
750 next = next # XXX
~/anaconda3/envs/py3.7/lib/python3.7/multiprocessing/pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception)
119 job, i, func, args, kwds = task
120 try:
--> 121 result = (True, func(*args, **kwds))
122 except Exception as e:
123 if wrap_exception and func is not _helper_reraises_exception:
~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/pandas/summary_pandas.py in multiprocess_1d(args)
77 """
78 column, series = args
---> 79 return column, describe_1d(config, series, summarizer, typeset)
80
81 pool_size = config.pool_size
~/anaconda3/envs/py3.7/lib/python3.7/site-packages/multimethod/init.py in call(self, *args, **kwargs)
313 func = self[tuple(func(arg) for func, arg in zip(self.type_checkers, args))]
314 try:
--> 315 return func(*args, **kwargs)
316 except TypeError as ex:
317 raise DispatchError(f"Function {func.code}") from ex
~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/pandas/summary_pandas.py in pandas_describe_1d(config, series, summarizer, typeset)
55
56 typeset.type_schema[series.name] = vtype
---> 57 return summarizer.summarize(config, series, dtype=vtype)
58
59
~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/summarizer.py in summarize(self, config, series, dtype)
37 object:
38 """
---> 39 _, _, summary = self.handle(str(dtype), config, series, {"type": str(dtype)})
40 return summary
41
~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/handler.py in handle(self, dtype, *args, **kwargs)
60 funcs = self.mapping.get(dtype, [])
61 op = compose(funcs)
---> 62 return op(*args)
63
64
~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/handler.py in func2(*x)
19 return f(*x)
20 else:
---> 21 return f(*res)
22
23 return func2
~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/handler.py in func2(*x)
19 return f(*x)
20 else:
---> 21 return f(*res)
22
23 return func2
~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/handler.py in func2(*x)
19 return f(*x)
20 else:
---> 21 return f(*res)
22
23 return func2
~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/handler.py in func2(*x)
15 def func(f: Callable, g: Callable) -> Callable:
16 def func2(*x) -> Any:
---> 17 res = g(*x)
18 if type(res) == bool:
19 return f(*x)
~/anaconda3/envs/py3.7/lib/python3.7/site-packages/multimethod/init.py in call(self, *args, **kwargs)
313 func = self[tuple(func(arg) for func, arg in zip(self.type_checkers, args))]
314 try:
--> 315 return func(*args, **kwargs)
316 except TypeError as ex:
317 raise DispatchError(f"Function {func.code}") from ex
~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/summary_algorithms.py in inner(config, series, summary)
63 if not summary["hashable"]:
64 return config, series, summary
---> 65 return fn(config, series, summary)
66
67 return inner
~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/summary_algorithms.py in inner(config, series, summary)
80 series = series.dropna()
81
---> 82 return fn(config, series, summary)
83
84 return inner
~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/pandas/describe_numeric_pandas.py in pandas_describe_numeric_1d(config, series, summary)
118
119 if chi_squared_threshold > 0.0:
--> 120 stats["chi_squared"] = chi_square(finite_values)
121
122 stats["range"] = stats["max"] - stats["min"]
~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/summary_algorithms.py in chi_square(values, histogram)
50 ) -> dict:
51 if histogram is None:
---> 52 histogram, _ = np.histogram(values, bins="auto")
53 return dict(chisquare(histogram)._asdict())
54
<array_function internals> in histogram(*args, **kwargs)
~/anaconda3/envs/py3.7/lib/python3.7/site-packages/numpy/lib/histograms.py in histogram(a, bins, range, normed, weights, density)
854 # The index computation is not guaranteed to give exactly
855 # consistent results within ~1 ULP of the bin edges.
--> 856 decrement = tmp_a < bin_edges[indices]
857 indices[decrement] -= 1
858 # The last bin includes the right edge. The other bins do not.
IndexError: index -9223372036854775808 is out of bounds for axis 0 with size 2
Expected Behaviour
return data profiling for this table
Data Description
SUM_TIMER_READ_WRITE
0 10950043000000000
Code that reproduces the bug
import pandas as pd
from ydata_profiling import ProfileReport
b = {'SUM_TIMER_READ_WRITE': [10950043000000000]}
table = pd.DataFrame.from_dict(b)
profile_report = ProfileReport(
table,
progress_bar=False,
infer_dtypes=False,
missing_diagrams=None,
correlations=None,
interactions=None,
# duplicates=None,
samples=None)
description = profile_report.get_description()
pandas-profiling version
v4.1.1
Dependencies
pandas==1.3.5
ydata-profiling==4.1.1
OS
Linux dsp-X299-WU8 5.15.0-69-generic #76~20.04.1-Ubuntu SMP Mon Mar 20 15:54:19 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux
Checklist
- There is not yet another bug report for this issue in the issue tracker
- The problem is reproducible from this bug report. This guide can help to craft a minimal bug report.
- The issue has not been resolved by the entries listed under Common Issues.