From 148dda05ffd4bd618f1d41b1895879c971f8bc80 Mon Sep 17 00:00:00 2001 From: agshruti12 Date: Mon, 28 Oct 2024 17:39:28 -0400 Subject: [PATCH] positivity zscore tests --- .../cleaned_data/positivity_zscore_chats.csv | 11 + tests/data/cleaned_data/test_chat_level.csv | 12 +- tests/ipython_notebooks/helper.ipynb | 236 ++++++++++++++++++ tests/run_tests.py | 19 ++ tests/test_feature_metrics.py | 3 +- 5 files changed, 279 insertions(+), 2 deletions(-) create mode 100644 tests/data/cleaned_data/positivity_zscore_chats.csv diff --git a/tests/data/cleaned_data/positivity_zscore_chats.csv b/tests/data/cleaned_data/positivity_zscore_chats.csv new file mode 100644 index 00000000..211a7692 --- /dev/null +++ b/tests/data/cleaned_data/positivity_zscore_chats.csv @@ -0,0 +1,11 @@ +conversation_num,speaker_nickname,message,expected_column,expected_value +I,1,I am enjoying the weather today!,positivity_zscore_chats,1.4346980995217895 +I,2,Likewise it's beautiful.,positivity_zscore_chats,1.3862275774841195 +I,3,I'm not a fan of the rain.,positivity_zscore_chats,-1.1105762978767422 +I,1,I think it's enchanting.,positivity_zscore_chats,1.3393376638185834 +I,3,Agree to disagree.,positivity_zscore_chats,-1.0295558496422406 +J,1,This conversation is more neutral.,positivity_zscore_chats,-0.4050742076501151 +J,2,How are you doing?,positivity_zscore_chats,-0.613670825442067 +J,3,I am mainly studying today.,positivity_zscore_chats,-0.7371504869897708 +J,1,Anything fun planned?,positivity_zscore_chats,0.5057873469055534 +J,3,Mostly a trip to the library.,positivity_zscore_chats,-0.77002302012911 diff --git a/tests/data/cleaned_data/test_chat_level.csv b/tests/data/cleaned_data/test_chat_level.csv index 94fb7f45..4ffe486b 100644 --- a/tests/data/cleaned_data/test_chat_level.csv +++ b/tests/data/cleaned_data/test_chat_level.csv @@ -733,4 +733,14 @@ H,1,fantastic difficulty,dale_chall_classification,difficult H,3,trying quote random,dale_chall_classification,easy H,3,erase eraser errand every even dig dim dime dine computer,dale_chall_classification,medium H,3,discover direction different,dale_chall_classification,easy -H,4,even this is magnificent! even this is magnificent!,dale_chall_classification,difficult \ No newline at end of file +H,4,even this is magnificent! even this is magnificent!,dale_chall_classification,difficult +I,1,I am enjoying the weather today!,positivity_zscore_conversation,0.855868 +I,2,Likewise it's beautiful.,positivity_zscore_conversation,0.8156 +I,3,I'm not a fan of the rain.,positivity_zscore_conversation,-1.2577 +I,1,I think it's enchanting.,positivity_zscore_conversation,0.7766 +I,3,Agree to disagree.,positivity_zscore_conversation,-1.19044 +J,1,This conversation is more neutral.,positivity_zscore_conversation,-0.002217 +J,2,How are you doing?,positivity_zscore_conversation,-0.443621 +J,3,I am mainly studying today.,positivity_zscore_conversation,-0.704912 +J,1,Anything fun planned?,positivity_zscore_conversation,1.925224 +J,3,Mostly a trip to the library.,positivity_zscore_conversation,-0.774473 \ No newline at end of file diff --git a/tests/ipython_notebooks/helper.ipynb b/tests/ipython_notebooks/helper.ipynb index b8667236..8f042a9b 100644 --- a/tests/ipython_notebooks/helper.ipynb +++ b/tests/ipython_notebooks/helper.ipynb @@ -2771,6 +2771,242 @@ "print(first_person_regex)\n", "re.findall(first_person_regex, \"hiii\")\n" ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
messageexpected_valuepositive_bertpositivity_zscore_chats
0i am enjoying the weather today0.8558680.9865091.434698
1likewise its beautiful0.8156000.9681421.386228
2im not a fan of the rain-1.2577000.021985-1.110576
3i think its enchanting0.7766000.9503731.339338
4agree to disagree-1.1904400.052687-1.029556
5this conversation is more neutral-0.0022170.289333-0.405074
6how are you doing-0.4436210.210286-0.613671
7i am mainly studying today-0.7049120.163493-0.737150
8anything fun planned1.9252240.6345010.505787
9mostly a trip to the library-0.7744730.151037-0.770023
\n", + "
" + ], + "text/plain": [ + " message expected_value positive_bert \\\n", + "0 i am enjoying the weather today 0.855868 0.986509 \n", + "1 likewise its beautiful 0.815600 0.968142 \n", + "2 im not a fan of the rain -1.257700 0.021985 \n", + "3 i think its enchanting 0.776600 0.950373 \n", + "4 agree to disagree -1.190440 0.052687 \n", + "5 this conversation is more neutral -0.002217 0.289333 \n", + "6 how are you doing -0.443621 0.210286 \n", + "7 i am mainly studying today -0.704912 0.163493 \n", + "8 anything fun planned 1.925224 0.634501 \n", + "9 mostly a trip to the library -0.774473 0.151037 \n", + "\n", + " positivity_zscore_chats \n", + "0 1.434698 \n", + "1 1.386228 \n", + "2 -1.110576 \n", + "3 1.339338 \n", + "4 -1.029556 \n", + "5 -0.405074 \n", + "6 -0.613671 \n", + "7 -0.737150 \n", + "8 0.505787 \n", + "9 -0.770023 " + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import scipy.stats as stats\n", + "\n", + "pd.read_csv(\"./output/chat/test_positivity_chat_level.csv\")[['message', \"expected_value\", \"positive_bert\", 'positivity_zscore_chats']]\n", + "# pd.read_csv(\"./output/chat/test_positivity_chat_level.csv\")[[\"expected_value\"]]\n", + "\n", + "\n", + "# A,1,I am enjoying the weather today!,positivity_zscore_chats,0.855868\n", + "# A,2,Likewise it's beautiful.,positivity_zscore_chats,0.8156\n", + "# A,3,I'm not a fan of the rain.,positivity_zscore_chats,-1.2577\n", + "# A,1,I think it's enchanting.,positivity_zscore_chats,0.7766\n", + "# A,3,Agree to disagree.,positivity_zscore_chats,-1.19044\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv(\"./data/cleaned_data/positivity_zscore_chats.csv\")\n", + "df['expected_value'] = stats.zscore(pd.read_csv(\"./output/chat/test_positivity_chat_level.csv\")['positivity_zscore_chats'])\n", + "df.to_csv(\"./data/cleaned_data/positivity_zscore_chats.csv\", index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
positive_bert
00.986509
10.968142
20.021985
30.950373
40.052687
\n", + "
" + ], + "text/plain": [ + " positive_bert\n", + "0 0.986509\n", + "1 0.968142\n", + "2 0.021985\n", + "3 0.950373\n", + "4 0.052687" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.read_csv(\"./output/chat/test_positivity_chat_level.csv\")[[\"positive_bert\"]]\n" + ] } ], "metadata": { diff --git a/tests/run_tests.py b/tests/run_tests.py index 3b8103a7..c15eaaba 100644 --- a/tests/run_tests.py +++ b/tests/run_tests.py @@ -29,9 +29,28 @@ test_forward_flow_df = pd.read_csv("data/cleaned_data/fflow.csv", encoding=chat_encoding['encoding']) conv_complex_timestamps_df = pd.read_csv("data/cleaned_data/test_conv_level_complex_timestamps.csv", encoding=chat_encoding['encoding']) + positivity_zscore = pd.read_csv("data/cleaned_data/positivity_zscore_chats.csv", encoding=chat_encoding['encoding']) + # TESTING DATASETS ------------------------------- + test_positivity = FeatureBuilder( + input_df = positivity_zscore, + vector_directory = "./vector_data/", + output_file_path_chat_level = "./output/chat/test_positivity_chat_level.csv", + output_file_path_user_level = "./output/user/test_positivity_user_level.csv", + output_file_path_conv_level = "./output/conv/test_positivity_conv_level.csv", + custom_features = [ # these require vect_data, so they now need to be explicitly included in order to calculate them + "(BERT) Mimicry", + "Moving Mimicry", + "Forward Flow", + "Discursive Diversity" + ], + turns = False, + regenerate_vectors = True + ) + test_positivity.featurize() + testing_chat = FeatureBuilder( input_df = chat_df, vector_directory = "./vector_data/", diff --git a/tests/test_feature_metrics.py b/tests/test_feature_metrics.py index e52e67d4..3289d20f 100644 --- a/tests/test_feature_metrics.py +++ b/tests/test_feature_metrics.py @@ -8,7 +8,8 @@ test_chat_df = pd.read_csv("./output/chat/test_chat_level_chat.csv") test_info_exchange_zscore_df = pd.read_csv("./output/chat/info_exchange_zscore_chats.csv") -test_chat_df = pd.concat([test_chat_df, test_info_exchange_zscore_df], axis=0) +test_pos = pd.read_csv("./output/chat/test_positivity_chat_level.csv") +test_chat_df = pd.concat([test_chat_df, test_info_exchange_zscore_df, test_pos], axis=0) test_conv_df = pd.read_csv("./output/conv/test_conv_level_conv.csv") test_chat_complex_df = pd.read_csv( "./output/chat/test_chat_level_chat_complex.csv")