Skip to content

index -9223372036854775808 is out of bounds for axis 0 with size 2 #1313

Open
@zhoujianch

Description

Current Behaviour

IndexError Traceback (most recent call last)
in
----> 1 profile_report.get_description()

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/typeguard/init.py in wrapper(*args, **kwargs)
1031 memo = _CallMemo(python_func, _localns, args=args, kwargs=kwargs)
1032 check_argument_types(memo)
-> 1033 retval = func(*args, **kwargs)
1034 try:
1035 check_return_type(retval, memo)

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/profile_report.py in get_description(self)
315 Dict containing a description for each variable in the DataFrame.
316 """
--> 317 return self.description_set
318
319 def get_rejected_variables(self) -> set:

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/typeguard/init.py in wrapper(*args, **kwargs)
1031 memo = _CallMemo(python_func, _localns, args=args, kwargs=kwargs)
1032 check_argument_types(memo)
-> 1033 retval = func(*args, **kwargs)
1034 try:
1035 check_return_type(retval, memo)

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/profile_report.py in description_set(self)
251 self.summarizer,
252 self.typeset,
--> 253 self._sample,
254 )
255 return self._description_set

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/describe.py in describe(config, df, summarizer, typeset, sample)
70 pbar.total += len(df.columns)
71 series_description = get_series_descriptions(
---> 72 config, df, summarizer, typeset, pbar
73 )
74

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/multimethod/init.py in call(self, *args, **kwargs)
313 func = self[tuple(func(arg) for func, arg in zip(self.type_checkers, args))]
314 try:
--> 315 return func(*args, **kwargs)
316 except TypeError as ex:
317 raise DispatchError(f"Function {func.code}") from ex

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/pandas/summary_pandas.py in pandas_get_series_descriptions(config, df, summarizer, typeset, pbar)
98 with multiprocessing.pool.ThreadPool(pool_size) as executor:
99 for i, (column, description) in enumerate(
--> 100 executor.imap_unordered(multiprocess_1d, args)
101 ):
102 pbar.set_postfix_str(f"Describe variable:{column}")

~/anaconda3/envs/py3.7/lib/python3.7/multiprocessing/pool.py in next(self, timeout)
746 if success:
747 return value
--> 748 raise value
749
750 next = next # XXX

~/anaconda3/envs/py3.7/lib/python3.7/multiprocessing/pool.py in worker(inqueue, outqueue, initializer, initargs, maxtasks, wrap_exception)
119 job, i, func, args, kwds = task
120 try:
--> 121 result = (True, func(*args, **kwds))
122 except Exception as e:
123 if wrap_exception and func is not _helper_reraises_exception:

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/pandas/summary_pandas.py in multiprocess_1d(args)
77 """
78 column, series = args
---> 79 return column, describe_1d(config, series, summarizer, typeset)
80
81 pool_size = config.pool_size

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/multimethod/init.py in call(self, *args, **kwargs)
313 func = self[tuple(func(arg) for func, arg in zip(self.type_checkers, args))]
314 try:
--> 315 return func(*args, **kwargs)
316 except TypeError as ex:
317 raise DispatchError(f"Function {func.code}") from ex

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/pandas/summary_pandas.py in pandas_describe_1d(config, series, summarizer, typeset)
55
56 typeset.type_schema[series.name] = vtype
---> 57 return summarizer.summarize(config, series, dtype=vtype)
58
59

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/summarizer.py in summarize(self, config, series, dtype)
37 object:
38 """
---> 39 _, _, summary = self.handle(str(dtype), config, series, {"type": str(dtype)})
40 return summary
41

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/handler.py in handle(self, dtype, *args, **kwargs)
60 funcs = self.mapping.get(dtype, [])
61 op = compose(funcs)
---> 62 return op(*args)
63
64

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/handler.py in func2(*x)
19 return f(*x)
20 else:
---> 21 return f(*res)
22
23 return func2

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/handler.py in func2(*x)
19 return f(*x)
20 else:
---> 21 return f(*res)
22
23 return func2

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/handler.py in func2(*x)
19 return f(*x)
20 else:
---> 21 return f(*res)
22
23 return func2

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/handler.py in func2(*x)
15 def func(f: Callable, g: Callable) -> Callable:
16 def func2(*x) -> Any:
---> 17 res = g(*x)
18 if type(res) == bool:
19 return f(*x)

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/multimethod/init.py in call(self, *args, **kwargs)
313 func = self[tuple(func(arg) for func, arg in zip(self.type_checkers, args))]
314 try:
--> 315 return func(*args, **kwargs)
316 except TypeError as ex:
317 raise DispatchError(f"Function {func.code}") from ex

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/summary_algorithms.py in inner(config, series, summary)
63 if not summary["hashable"]:
64 return config, series, summary
---> 65 return fn(config, series, summary)
66
67 return inner

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/summary_algorithms.py in inner(config, series, summary)
80 series = series.dropna()
81
---> 82 return fn(config, series, summary)
83
84 return inner

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/pandas/describe_numeric_pandas.py in pandas_describe_numeric_1d(config, series, summary)
118
119 if chi_squared_threshold > 0.0:
--> 120 stats["chi_squared"] = chi_square(finite_values)
121
122 stats["range"] = stats["max"] - stats["min"]

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/ydata_profiling/model/summary_algorithms.py in chi_square(values, histogram)
50 ) -> dict:
51 if histogram is None:
---> 52 histogram, _ = np.histogram(values, bins="auto")
53 return dict(chisquare(histogram)._asdict())
54

<array_function internals> in histogram(*args, **kwargs)

~/anaconda3/envs/py3.7/lib/python3.7/site-packages/numpy/lib/histograms.py in histogram(a, bins, range, normed, weights, density)
854 # The index computation is not guaranteed to give exactly
855 # consistent results within ~1 ULP of the bin edges.
--> 856 decrement = tmp_a < bin_edges[indices]
857 indices[decrement] -= 1
858 # The last bin includes the right edge. The other bins do not.

IndexError: index -9223372036854775808 is out of bounds for axis 0 with size 2

Expected Behaviour

return data profiling for this table

Data Description

SUM_TIMER_READ_WRITE
0 10950043000000000

Code that reproduces the bug

import pandas as pd
from ydata_profiling import ProfileReport

b = {'SUM_TIMER_READ_WRITE': [10950043000000000]}
table = pd.DataFrame.from_dict(b)

profile_report = ProfileReport(
            table,
            progress_bar=False,
            infer_dtypes=False,
            missing_diagrams=None,
            correlations=None,
            interactions=None,
            # duplicates=None,
            samples=None)
description = profile_report.get_description()

pandas-profiling version

v4.1.1

Dependencies

pandas==1.3.5
ydata-profiling==4.1.1

OS

Linux dsp-X299-WU8 5.15.0-69-generic #76~20.04.1-Ubuntu SMP Mon Mar 20 15:54:19 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux

Checklist

  • There is not yet another bug report for this issue in the issue tracker
  • The problem is reproducible from this bug report. This guide can help to craft a minimal bug report.
  • The issue has not been resolved by the entries listed under Common Issues.

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions