Skip to content

Commit

Permalink
positivity zscore tests
Browse files Browse the repository at this point in the history
  • Loading branch information
agshruti12 committed Oct 28, 2024
1 parent d15c105 commit 148dda0
Show file tree
Hide file tree
Showing 5 changed files with 279 additions and 2 deletions.
11 changes: 11 additions & 0 deletions tests/data/cleaned_data/positivity_zscore_chats.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
conversation_num,speaker_nickname,message,expected_column,expected_value
I,1,I am enjoying the weather today!,positivity_zscore_chats,1.4346980995217895
I,2,Likewise it's beautiful.,positivity_zscore_chats,1.3862275774841195
I,3,I'm not a fan of the rain.,positivity_zscore_chats,-1.1105762978767422
I,1,I think it's enchanting.,positivity_zscore_chats,1.3393376638185834
I,3,Agree to disagree.,positivity_zscore_chats,-1.0295558496422406
J,1,This conversation is more neutral.,positivity_zscore_chats,-0.4050742076501151
J,2,How are you doing?,positivity_zscore_chats,-0.613670825442067
J,3,I am mainly studying today.,positivity_zscore_chats,-0.7371504869897708
J,1,Anything fun planned?,positivity_zscore_chats,0.5057873469055534
J,3,Mostly a trip to the library.,positivity_zscore_chats,-0.77002302012911
12 changes: 11 additions & 1 deletion tests/data/cleaned_data/test_chat_level.csv
Original file line number Diff line number Diff line change
Expand Up @@ -733,4 +733,14 @@ H,1,fantastic difficulty,dale_chall_classification,difficult
H,3,trying quote random,dale_chall_classification,easy
H,3,erase eraser errand every even dig dim dime dine computer,dale_chall_classification,medium
H,3,discover direction different,dale_chall_classification,easy
H,4,even this is magnificent! even this is magnificent!,dale_chall_classification,difficult
H,4,even this is magnificent! even this is magnificent!,dale_chall_classification,difficult
I,1,I am enjoying the weather today!,positivity_zscore_conversation,0.855868
I,2,Likewise it's beautiful.,positivity_zscore_conversation,0.8156
I,3,I'm not a fan of the rain.,positivity_zscore_conversation,-1.2577
I,1,I think it's enchanting.,positivity_zscore_conversation,0.7766
I,3,Agree to disagree.,positivity_zscore_conversation,-1.19044
J,1,This conversation is more neutral.,positivity_zscore_conversation,-0.002217
J,2,How are you doing?,positivity_zscore_conversation,-0.443621
J,3,I am mainly studying today.,positivity_zscore_conversation,-0.704912
J,1,Anything fun planned?,positivity_zscore_conversation,1.925224
J,3,Mostly a trip to the library.,positivity_zscore_conversation,-0.774473
236 changes: 236 additions & 0 deletions tests/ipython_notebooks/helper.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2771,6 +2771,242 @@
"print(first_person_regex)\n",
"re.findall(first_person_regex, \"hiii\")\n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>message</th>\n",
" <th>expected_value</th>\n",
" <th>positive_bert</th>\n",
" <th>positivity_zscore_chats</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>i am enjoying the weather today</td>\n",
" <td>0.855868</td>\n",
" <td>0.986509</td>\n",
" <td>1.434698</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>likewise its beautiful</td>\n",
" <td>0.815600</td>\n",
" <td>0.968142</td>\n",
" <td>1.386228</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>im not a fan of the rain</td>\n",
" <td>-1.257700</td>\n",
" <td>0.021985</td>\n",
" <td>-1.110576</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>i think its enchanting</td>\n",
" <td>0.776600</td>\n",
" <td>0.950373</td>\n",
" <td>1.339338</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>agree to disagree</td>\n",
" <td>-1.190440</td>\n",
" <td>0.052687</td>\n",
" <td>-1.029556</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>this conversation is more neutral</td>\n",
" <td>-0.002217</td>\n",
" <td>0.289333</td>\n",
" <td>-0.405074</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>how are you doing</td>\n",
" <td>-0.443621</td>\n",
" <td>0.210286</td>\n",
" <td>-0.613671</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>i am mainly studying today</td>\n",
" <td>-0.704912</td>\n",
" <td>0.163493</td>\n",
" <td>-0.737150</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>anything fun planned</td>\n",
" <td>1.925224</td>\n",
" <td>0.634501</td>\n",
" <td>0.505787</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>mostly a trip to the library</td>\n",
" <td>-0.774473</td>\n",
" <td>0.151037</td>\n",
" <td>-0.770023</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" message expected_value positive_bert \\\n",
"0 i am enjoying the weather today 0.855868 0.986509 \n",
"1 likewise its beautiful 0.815600 0.968142 \n",
"2 im not a fan of the rain -1.257700 0.021985 \n",
"3 i think its enchanting 0.776600 0.950373 \n",
"4 agree to disagree -1.190440 0.052687 \n",
"5 this conversation is more neutral -0.002217 0.289333 \n",
"6 how are you doing -0.443621 0.210286 \n",
"7 i am mainly studying today -0.704912 0.163493 \n",
"8 anything fun planned 1.925224 0.634501 \n",
"9 mostly a trip to the library -0.774473 0.151037 \n",
"\n",
" positivity_zscore_chats \n",
"0 1.434698 \n",
"1 1.386228 \n",
"2 -1.110576 \n",
"3 1.339338 \n",
"4 -1.029556 \n",
"5 -0.405074 \n",
"6 -0.613671 \n",
"7 -0.737150 \n",
"8 0.505787 \n",
"9 -0.770023 "
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import scipy.stats as stats\n",
"\n",
"pd.read_csv(\"./output/chat/test_positivity_chat_level.csv\")[['message', \"expected_value\", \"positive_bert\", 'positivity_zscore_chats']]\n",
"# pd.read_csv(\"./output/chat/test_positivity_chat_level.csv\")[[\"expected_value\"]]\n",
"\n",
"\n",
"# A,1,I am enjoying the weather today!,positivity_zscore_chats,0.855868\n",
"# A,2,Likewise it's beautiful.,positivity_zscore_chats,0.8156\n",
"# A,3,I'm not a fan of the rain.,positivity_zscore_chats,-1.2577\n",
"# A,1,I think it's enchanting.,positivity_zscore_chats,0.7766\n",
"# A,3,Agree to disagree.,positivity_zscore_chats,-1.19044\n"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv(\"./data/cleaned_data/positivity_zscore_chats.csv\")\n",
"df['expected_value'] = stats.zscore(pd.read_csv(\"./output/chat/test_positivity_chat_level.csv\")['positivity_zscore_chats'])\n",
"df.to_csv(\"./data/cleaned_data/positivity_zscore_chats.csv\", index=False)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>positive_bert</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.986509</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.968142</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.021985</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0.950373</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0.052687</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" positive_bert\n",
"0 0.986509\n",
"1 0.968142\n",
"2 0.021985\n",
"3 0.950373\n",
"4 0.052687"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.read_csv(\"./output/chat/test_positivity_chat_level.csv\")[[\"positive_bert\"]]\n"
]
}
],
"metadata": {
Expand Down
19 changes: 19 additions & 0 deletions tests/run_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,28 @@
test_forward_flow_df = pd.read_csv("data/cleaned_data/fflow.csv", encoding=chat_encoding['encoding'])
conv_complex_timestamps_df = pd.read_csv("data/cleaned_data/test_conv_level_complex_timestamps.csv", encoding=chat_encoding['encoding'])

positivity_zscore = pd.read_csv("data/cleaned_data/positivity_zscore_chats.csv", encoding=chat_encoding['encoding'])


# TESTING DATASETS -------------------------------

test_positivity = FeatureBuilder(
input_df = positivity_zscore,
vector_directory = "./vector_data/",
output_file_path_chat_level = "./output/chat/test_positivity_chat_level.csv",
output_file_path_user_level = "./output/user/test_positivity_user_level.csv",
output_file_path_conv_level = "./output/conv/test_positivity_conv_level.csv",
custom_features = [ # these require vect_data, so they now need to be explicitly included in order to calculate them
"(BERT) Mimicry",
"Moving Mimicry",
"Forward Flow",
"Discursive Diversity"
],
turns = False,
regenerate_vectors = True
)
test_positivity.featurize()

testing_chat = FeatureBuilder(
input_df = chat_df,
vector_directory = "./vector_data/",
Expand Down
3 changes: 2 additions & 1 deletion tests/test_feature_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

test_chat_df = pd.read_csv("./output/chat/test_chat_level_chat.csv")
test_info_exchange_zscore_df = pd.read_csv("./output/chat/info_exchange_zscore_chats.csv")
test_chat_df = pd.concat([test_chat_df, test_info_exchange_zscore_df], axis=0)
test_pos = pd.read_csv("./output/chat/test_positivity_chat_level.csv")
test_chat_df = pd.concat([test_chat_df, test_info_exchange_zscore_df, test_pos], axis=0)
test_conv_df = pd.read_csv("./output/conv/test_conv_level_conv.csv")
test_chat_complex_df = pd.read_csv(
"./output/chat/test_chat_level_chat_complex.csv")
Expand Down

0 comments on commit 148dda0

Please sign in to comment.