Skip to content

Commit

Permalink
fix: fix a bug when calculating coverage (#95)
Browse files Browse the repository at this point in the history
  • Loading branch information
zzhang13 authored Sep 1, 2020
1 parent 10babfe commit 06ba033
Show file tree
Hide file tree
Showing 6 changed files with 20 additions and 15 deletions.
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
ipython
pandas==1.1.0
pandas==1.0.3
bokeh==2.0.0
tqdm==4.43.0
matplotlib==3.2.1
XlsxWriter==1.2.8
ibm-watson>=4.3.0
numpy==1.18.2
requests>=2.18.4
scikit-learn==0.22.2.post1
scikit-learn>=0.21.3
xlrd==1.2.0
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def read_md(f):
package_dir={'': 'src'},
packages=setuptools.find_packages('src'),
install_requires=[
'pandas>=1.0.3',
'pandas==1.0.3',
'bokeh==2.0.0',
'tqdm==4.43.0',
'scikit-learn>=0.21.3',
Expand All @@ -60,7 +60,6 @@ def read_md(f):
'ibm-watson>=4.3.0',
'numpy>=1.18.2',
'requests>=2.18.4',
'scikit-learn==0.22.2.post1',
'xlrd==1.2.0'
],

Expand Down
13 changes: 8 additions & 5 deletions src/assistant_improve_toolkit/computation_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import itertools
import numpy as np
from IPython.display import HTML
from tqdm.notebook import tqdm
from tqdm import tqdm

MAX_DISAMBIGUATION_LENGTH = 5
MAX_MORE_OPTION_LENGTH = 5
Expand Down Expand Up @@ -174,14 +174,17 @@ def get_coverage_df(df_tbot_raw, df_coverage_nodes, conf_threshold):
df_tbot_raw['Not Covered cause'] = None

# Filter all the valid dialog node ids for non-coverage
df_coverage_valid = df_coverage_nodes[df_coverage_nodes['Valid']] # ['dialog_node'].tolist()
df_coverage_valid = df_coverage_nodes[df_coverage_nodes['Valid']]
df_coverage_valid_dict = dict()
for idx, row in df_coverage_nodes[df_coverage_nodes['Valid']].iterrows():
df_coverage_valid_dict[row['Node ID']] = {row['Node ID'], row['Node Name']}

# (1) Mark all messages that hit any non-coverage node including but not limited to 'anything_else' as 'Not covered'
# and update the 'Not Covered cause' column
for node in df_coverage_valid['Node ID'].tolist():
cause = "'{}' node".format(df_coverage_valid.loc[df_coverage_valid['Node ID'] == node, 'Condition'].values[0])
for node_id, name_set in df_coverage_valid_dict.items():
cause = "'{}' node".format(df_coverage_valid.loc[df_coverage_valid['Node ID'] == node_id, 'Condition'].values[0])
df_tbot_raw.loc[
(df_tbot_raw['response.output.nodes_visited_s'].apply(lambda x: bool(intersection(x, node)))), [
(df_tbot_raw['response.output.nodes_visited_s'].apply(lambda x: bool(intersection(x, name_set)))), [
'Covered', 'Not Covered cause']] = [False, cause]

# (2) Mark all messages that did not meet confidence threshold set as 'Not covered' and update the 'Not Covered
Expand Down
2 changes: 1 addition & 1 deletion src/assistant_improve_toolkit/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '__version__ = '1.1.4''
__version__ = '1.1.4'
2 changes: 1 addition & 1 deletion src/assistant_improve_toolkit/visualize_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ def show_coverage_over_time(df_coverage, interval='day'):
start_datetime -= delta
end_datetime += delta

time_index_df = pd.DataFrame([dt for dt in datetime_range(start_datetime, end_datetime, delta)],
time_index_df = pd.DataFrame([dt for dt in coverage_time.response_datetime_interval],
columns=['response_datetime_interval'])

coverage_data = time_index_df.merge(coverage_time, how='left', on=['response_datetime_interval'])
Expand Down
11 changes: 7 additions & 4 deletions src/main/python/computation_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,14 +173,17 @@ def get_coverage_df(df_tbot_raw, df_coverage_nodes, conf_threshold):
df_tbot_raw['Not Covered cause'] = None

# Filter all the valid dialog node ids for non-coverage
df_coverage_valid = df_coverage_nodes[df_coverage_nodes['Valid']] # ['dialog_node'].tolist()
df_coverage_valid = df_coverage_nodes[df_coverage_nodes['Valid']]
df_coverage_valid_dict = dict()
for idx, row in df_coverage_nodes[df_coverage_nodes['Valid']].iterrows():
df_coverage_valid_dict[row['Node ID']] = {row['Node ID'], row['Node Name']}

# (1) Mark all messages that hit any non-coverage node including but not limited to 'anything_else' as 'Not covered'
# and update the 'Not Covered cause' column
for node in df_coverage_valid['Node ID'].tolist():
cause = "'{}' node".format(df_coverage_valid.loc[df_coverage_valid['Node ID'] == node, 'Condition'].values[0])
for node_id, name_set in df_coverage_valid_dict.items():
cause = "'{}' node".format(df_coverage_valid.loc[df_coverage_valid['Node ID'] == node_id, 'Condition'].values[0])
df_tbot_raw.loc[
(df_tbot_raw['response.output.nodes_visited_s'].apply(lambda x: bool(intersection(x, node)))), [
(df_tbot_raw['response.output.nodes_visited_s'].apply(lambda x: bool(intersection(x, name_set)))), [
'Covered', 'Not Covered cause']] = [False, cause]

# (2) Mark all messages that did not meet confidence threshold set as 'Not covered' and update the 'Not Covered
Expand Down

0 comments on commit 06ba033

Please sign in to comment.