Skip to content

Commit

Permalink
feat: add support for assistant v2 log apis (#100)
Browse files Browse the repository at this point in the history
* feat: add support for assistant v2 log apis

* fix:update travis file

* Update .travis.yml

* Update .travis.yml

* Update .travis.yml
  • Loading branch information
zzhang13 authored May 21, 2021
1 parent 121a29b commit 2c8d7bb
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 17 deletions.
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ install:
before_script:
- pip3 install -r requirements.txt
script:
- pip3 install -U python-dotenv
- tox
- pip3 install python-dotenv
#- travis_wait tox
before_deploy:
- pip3 install bumpversion pypandoc
- sudo apt-get update
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ bokeh==2.0.0
tqdm==4.43.0
matplotlib==3.2.1
XlsxWriter==1.2.8
ibm-watson>=4.3.0
ibm-watson>=5.1.0
numpy==1.18.2
requests>=2.18.4
scikit-learn>=0.21.3
Expand Down
34 changes: 27 additions & 7 deletions src/assistant_improve_toolkit/computation_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import numpy as np
from IPython.display import HTML
from tqdm import tqdm
import json
import base64

MAX_DISAMBIGUATION_LENGTH = 5
MAX_MORE_OPTION_LENGTH = 5
Expand Down Expand Up @@ -100,11 +102,12 @@ def get_effective_df(df_tbot_raw, ineffective_intents, df_escalate_nodes, filter
if node_id in node_title_map:
node_id_visit_list[seq_id] = node_title_map[node_id]

node_stack_list = item['response_dialog_stack']
for stack_id, stack_item in enumerate(node_stack_list):
for key, value in stack_item.items():
if value in node_title_map:
stack_item[key] = node_title_map[value]
if 'response_dialog_stack' in item:
node_stack_list = item['response_dialog_stack']
for stack_id, stack_item in enumerate(node_stack_list):
for key, value in stack_item.items():
if value in node_title_map:
stack_item[key] = node_title_map[value]

ineffective_nodes = None
if df_escalate_nodes.size > 0:
Expand Down Expand Up @@ -249,6 +252,13 @@ def chk_is_valid_node(node_ids, node_name, node_conditions, nodes):
df_valid_nodes = df_valid_nodes.drop('Type', 1)
return df_valid_nodes

def extract_dialog_stack(payload):
res = []
if 'main skill' in payload:
if 'system' in payload['main skill']:
if 'state' in payload['main skill']['system']:
res = json.loads(base64.b64decode(payload['main skill']['system']['state']))['dialog_stack']
return res

def format_data(df):
"""This function formats the log data from watson assistant by separating columns and changing datatypes
Expand Down Expand Up @@ -284,16 +294,22 @@ def format_data(df):
df2['response_context_system'].apply(pd.Series).add_prefix('response_')],
axis=1) # type: pd.DataFrame

if 'response_context_skills' in df3:
df3['response_dialog_stack'] = df3['response_context_skills'].apply(lambda x: extract_dialog_stack(x))

if 'response_context_response_context_IntentStarted' in df3.columns \
and 'response_context_response_context_IntentCompleted' in df3.columns:
cols = ['log_id', 'response_timestamp', 'response_context_conversation_id', 'request_input', 'response_text',
'response_intents', 'response_entities', 'response_nodes_visited', 'response_dialog_request_counter',
'response_dialog_stack', 'response_dialog_turn_counter',
'response_dialog_turn_counter',
'response_context_response_context_IntentStarted', 'response_context_response_context_IntentCompleted']
else:
cols = ['log_id', 'response_timestamp', 'response_context_conversation_id', 'request_input', 'response_text',
'response_intents', 'response_entities', 'response_nodes_visited', 'response_dialog_request_counter',
'response_dialog_stack', 'response_dialog_turn_counter']
'response_dialog_turn_counter']

if 'response_dialog_stack' in df3.columns:
cols.append('response_dialog_stack')

print('Extracting intents ...')
# Select a few required columns
Expand Down Expand Up @@ -366,6 +382,10 @@ def format_data(df):
df6['Date'] = [datetime.datetime.date(d) for d in df6['response.timestamp']] # extracting date from timestamp
df6['Customer ID (must retain for delete)'] = '' # Adding a column to retain customer id

df6.loc[df6['response.output.nodes_visited_s'].isnull(), 'response.output.nodes_visited_s'] = df6.loc[
df6['response.output.nodes_visited_s'].isnull(), 'response.output.nodes_visited_s'].apply(
lambda x: [])

print('Completed!')
return df6

Expand Down
50 changes: 43 additions & 7 deletions src/assistant_improve_toolkit/watson_assistant_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,7 @@ def get_assistant_definition(sdk_object, assistant_info, project=None, overwrite
else:
return None


def _get_logs_from_api(sdk_object, workspace_id, log_filter, num_logs):
def _get_logs_from_v1_api(sdk_object, workspace_id, log_filter, num_logs):
log_list = list()
try:
current_cursor = None
Expand Down Expand Up @@ -119,7 +118,38 @@ def _get_logs_from_api(sdk_object, workspace_id, log_filter, num_logs):
return log_list


def get_logs(sdk_object, assistant_info, num_logs, filename, filters=None, project=None, overwrite=False):
def _get_logs_from_v2_api(sdk_object, assistant_id, log_filter, num_logs):
log_list = list()
try:
current_cursor = None
while num_logs > 0:
logs_response = sdk_object.list_logs(
assistant_id=assistant_id,
page_limit=500,
cursor=current_cursor,
filter=log_filter
).get_result()
min_num = min(num_logs, len(logs_response['logs']))
log_list.extend(logs_response['logs'][:min_num])
print('\r{} logs retrieved'.format(len(log_list)), end='')
num_logs = num_logs - min_num
current_cursor = None
# Check if there is another page of logs to be fetched
if 'pagination' in logs_response:
# Get the url from which logs are to fetched
if 'next_cursor' in logs_response['pagination']:
current_cursor = logs_response['pagination']['next_cursor']
else:
break
except Exception as ex:
traceback.print_tb(ex.__traceback__)
raise RuntimeError("Error getting logs using API. Please check if URL/credentials are correct.")

return log_list


def get_logs(sdk_v1_object, sdk_v2_object, assistant_info, num_logs, filename, filters=None, project=None,
overwrite=False, version=1):
"""This function calls Watson Assistant API to retrieve logs, using pagination if necessary.
The goal is to retrieve utterances (user inputs) from the logs.
Parameters
Expand Down Expand Up @@ -173,10 +203,16 @@ def get_logs(sdk_object, assistant_info, num_logs, filename, filters=None, proje
if skill_id is not None and len(skill_id) > 0:
filters.append('workspace_id::{}'.format(skill_id))

logs = _get_logs_from_api(sdk_object=sdk_object,
workspace_id=workspace_id,
log_filter=','.join(filters),
num_logs=num_logs)
if version == 1:
logs = _get_logs_from_v1_api(sdk_object=sdk_v1_object,
workspace_id=workspace_id,
log_filter=','.join(filters),
num_logs=num_logs)
elif version == 2:
logs = _get_logs_from_v2_api(sdk_object=sdk_v2_object,
assistant_id=assistant_id,
log_filter=','.join(filters),
num_logs=num_logs)
print('\nLoaded {} logs'.format(len(logs)))

if not file_exist or overwrite:
Expand Down

0 comments on commit 2c8d7bb

Please sign in to comment.