Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion dataprofiler/profilers/graph_profiler.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Class and functions to calculate and profile properties of graph data."""
from __future__ import annotations

import importlib
import pickle
from collections import defaultdict
from datetime import datetime
Expand All @@ -10,6 +11,7 @@
import numpy as np
import pandas as pd
import scipy.stats as st
from packaging import version

from ..data_readers.graph_data import GraphData
from . import utils
Expand Down Expand Up @@ -391,6 +393,11 @@ def _get_continuous_distribution(
st.lognorm,
st.gamma,
]

scipy_gte_1_11_0 = version.parse(
importlib.metadata.version("scipy")
) >= version.parse("1.11.0")

for attribute in attributes:
if attribute in continuous_attributes:
data_as_list = self._attribute_data_as_list(graph, attribute)
Expand All @@ -401,7 +408,14 @@ def _get_continuous_distribution(

for distribution in distribution_candidates:
# compute fit, mle, kolmogorov-smirnov test to test fit, and pdf
fit = distribution.fit(df)

# scipy 1.11.0 updated the way they handle
# the loc parameter in fit() for lognorm
if distribution == st.lognorm and scipy_gte_1_11_0:
fit = distribution.fit(df, superfit=True)

else:
fit = distribution.fit(df)
mle = distribution.nnlf(fit, df)

if mle <= best_mle:
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,10 @@ fastavro>=1.0.0.post1
python-snappy>=0.5.4
charset-normalizer>=1.3.6
psutil>=4.0.0
scipy>=1.4.1,<1.11.0
scipy>=1.4.1
requests>=2.28.1
networkx>=2.5.1
typing-extensions>=3.10.0.2
HLL>=2.0.3
datasketches>=4.1.0
packaging>=23.0