From 06ba03380c1f309309f3e7cb484d736b7d3df1c3 Mon Sep 17 00:00:00 2001
From: Zhe Zhang <43827532+zzhang13@users.noreply.github.com>
Date: Tue, 1 Sep 2020 18:09:50 -0400
Subject: [PATCH] fix: fix a bug when calculating coverage (#95)

---
 requirements.txt                                  |  4 ++--
 setup.py                                          |  3 +--
 src/assistant_improve_toolkit/computation_func.py | 13 ++++++++-----
 src/assistant_improve_toolkit/version.py          |  2 +-
 src/assistant_improve_toolkit/visualize_func.py   |  2 +-
 src/main/python/computation_func.py               | 11 +++++++----
 6 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 0fed5c0..bff1ec5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 ipython
-pandas==1.1.0
+pandas==1.0.3
 bokeh==2.0.0
 tqdm==4.43.0
 matplotlib==3.2.1
@@ -7,5 +7,5 @@ XlsxWriter==1.2.8
 ibm-watson>=4.3.0
 numpy==1.18.2
 requests>=2.18.4
-scikit-learn==0.22.2.post1
+scikit-learn>=0.21.3
 xlrd==1.2.0
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 6713911..dfa6119 100644
--- a/setup.py
+++ b/setup.py
@@ -51,7 +51,7 @@ def read_md(f):
     package_dir={'': 'src'},
     packages=setuptools.find_packages('src'),
     install_requires=[
-        'pandas>=1.0.3',
+        'pandas==1.0.3',
         'bokeh==2.0.0',
         'tqdm==4.43.0',
         'scikit-learn>=0.21.3',
@@ -60,7 +60,6 @@ def read_md(f):
         'ibm-watson>=4.3.0',
         'numpy>=1.18.2',
         'requests>=2.18.4',
-        'scikit-learn==0.22.2.post1',
         'xlrd==1.2.0'
     ],
 
diff --git a/src/assistant_improve_toolkit/computation_func.py b/src/assistant_improve_toolkit/computation_func.py
index a566e95..1e1c628 100644
--- a/src/assistant_improve_toolkit/computation_func.py
+++ b/src/assistant_improve_toolkit/computation_func.py
@@ -21,7 +21,7 @@
 import itertools
 import numpy as np
 from IPython.display import HTML
-from tqdm.notebook import tqdm
+from tqdm import tqdm
 
 MAX_DISAMBIGUATION_LENGTH = 5
 MAX_MORE_OPTION_LENGTH = 5
@@ -174,14 +174,17 @@ def get_coverage_df(df_tbot_raw, df_coverage_nodes, conf_threshold):
     df_tbot_raw['Not Covered cause'] = None
 
     # Filter all the valid dialog node ids for non-coverage
-    df_coverage_valid = df_coverage_nodes[df_coverage_nodes['Valid']]  # ['dialog_node'].tolist()
+    df_coverage_valid = df_coverage_nodes[df_coverage_nodes['Valid']]
+    df_coverage_valid_dict = dict()
+    for idx, row in df_coverage_nodes[df_coverage_nodes['Valid']].iterrows():
+        df_coverage_valid_dict[row['Node ID']] = {row['Node ID'], row['Node Name']}
 
     # (1) Mark all messages that hit any non-coverage node including but not limited to 'anything_else' as 'Not covered'
     #  and update the 'Not Covered cause' column
-    for node in df_coverage_valid['Node ID'].tolist():
-        cause = "'{}' node".format(df_coverage_valid.loc[df_coverage_valid['Node ID'] == node, 'Condition'].values[0])
+    for node_id, name_set in df_coverage_valid_dict.items():
+        cause = "'{}' node".format(df_coverage_valid.loc[df_coverage_valid['Node ID'] == node_id, 'Condition'].values[0])
         df_tbot_raw.loc[
-            (df_tbot_raw['response.output.nodes_visited_s'].apply(lambda x: bool(intersection(x, node)))), [
+            (df_tbot_raw['response.output.nodes_visited_s'].apply(lambda x: bool(intersection(x, name_set)))), [
                 'Covered', 'Not Covered cause']] = [False, cause]
 
     # (2) Mark all messages  that did not meet confidence threshold set as 'Not covered' and update the 'Not Covered
diff --git a/src/assistant_improve_toolkit/version.py b/src/assistant_improve_toolkit/version.py
index 59f94b2..64778fc 100644
--- a/src/assistant_improve_toolkit/version.py
+++ b/src/assistant_improve_toolkit/version.py
@@ -1 +1 @@
-__version__ = '__version__ = '1.1.4''
\ No newline at end of file
+__version__ = '1.1.4'
\ No newline at end of file
diff --git a/src/assistant_improve_toolkit/visualize_func.py b/src/assistant_improve_toolkit/visualize_func.py
index bcf15e5..72f20ab 100644
--- a/src/assistant_improve_toolkit/visualize_func.py
+++ b/src/assistant_improve_toolkit/visualize_func.py
@@ -382,7 +382,7 @@ def show_coverage_over_time(df_coverage, interval='day'):
             start_datetime -= delta
             end_datetime += delta
 
-        time_index_df = pd.DataFrame([dt for dt in datetime_range(start_datetime, end_datetime, delta)],
+        time_index_df = pd.DataFrame([dt for dt in coverage_time.response_datetime_interval],
                                      columns=['response_datetime_interval'])
 
         coverage_data = time_index_df.merge(coverage_time, how='left', on=['response_datetime_interval'])
diff --git a/src/main/python/computation_func.py b/src/main/python/computation_func.py
index 447e4a3..5567974 100644
--- a/src/main/python/computation_func.py
+++ b/src/main/python/computation_func.py
@@ -173,14 +173,17 @@ def get_coverage_df(df_tbot_raw, df_coverage_nodes, conf_threshold):
     df_tbot_raw['Not Covered cause'] = None
 
     # Filter all the valid dialog node ids for non-coverage
-    df_coverage_valid = df_coverage_nodes[df_coverage_nodes['Valid']]  # ['dialog_node'].tolist()
+    df_coverage_valid = df_coverage_nodes[df_coverage_nodes['Valid']]
+    df_coverage_valid_dict = dict()
+    for idx, row in df_coverage_nodes[df_coverage_nodes['Valid']].iterrows():
+        df_coverage_valid_dict[row['Node ID']] = {row['Node ID'], row['Node Name']}
 
     # (1) Mark all messages that hit any non-coverage node including but not limited to 'anything_else' as 'Not covered'
     #  and update the 'Not Covered cause' column
-    for node in df_coverage_valid['Node ID'].tolist():
-        cause = "'{}' node".format(df_coverage_valid.loc[df_coverage_valid['Node ID'] == node, 'Condition'].values[0])
+    for node_id, name_set in df_coverage_valid_dict.items():
+        cause = "'{}' node".format(df_coverage_valid.loc[df_coverage_valid['Node ID'] == node_id, 'Condition'].values[0])
         df_tbot_raw.loc[
-            (df_tbot_raw['response.output.nodes_visited_s'].apply(lambda x: bool(intersection(x, node)))), [
+            (df_tbot_raw['response.output.nodes_visited_s'].apply(lambda x: bool(intersection(x, name_set)))), [
                 'Covered', 'Not Covered cause']] = [False, cause]
 
     # (2) Mark all messages  that did not meet confidence threshold set as 'Not covered' and update the 'Not Covered