From 148dda05ffd4bd618f1d41b1895879c971f8bc80 Mon Sep 17 00:00:00 2001
From: agshruti12 <agshruti2901@gmail.com>
Date: Mon, 28 Oct 2024 17:39:28 -0400
Subject: [PATCH] positivity zscore tests

---
 .../cleaned_data/positivity_zscore_chats.csv  |  11 +
 tests/data/cleaned_data/test_chat_level.csv   |  12 +-
 tests/ipython_notebooks/helper.ipynb          | 236 ++++++++++++++++++
 tests/run_tests.py                            |  19 ++
 tests/test_feature_metrics.py                 |   3 +-
 5 files changed, 279 insertions(+), 2 deletions(-)
 create mode 100644 tests/data/cleaned_data/positivity_zscore_chats.csv
diff --git a/tests/data/cleaned_data/positivity_zscore_chats.csv b/tests/data/cleaned_data/positivity_zscore_chats.csv
new file mode 100644
index 00000000..211a7692
--- /dev/null
+++ b/tests/data/cleaned_data/positivity_zscore_chats.csv
@@ -0,0 +1,11 @@
+conversation_num,speaker_nickname,message,expected_column,expected_value
+I,1,I am enjoying the weather today!,positivity_zscore_chats,1.4346980995217895
+I,2,Likewise it's beautiful.,positivity_zscore_chats,1.3862275774841195
+I,3,I'm not a fan of the rain.,positivity_zscore_chats,-1.1105762978767422
+I,1,I think it's enchanting.,positivity_zscore_chats,1.3393376638185834
+I,3,Agree to disagree.,positivity_zscore_chats,-1.0295558496422406
+J,1,This conversation is more neutral.,positivity_zscore_chats,-0.4050742076501151
+J,2,How are you doing?,positivity_zscore_chats,-0.613670825442067
+J,3,I am mainly studying today.,positivity_zscore_chats,-0.7371504869897708
+J,1,Anything fun planned?,positivity_zscore_chats,0.5057873469055534
+J,3,Mostly a trip to the library.,positivity_zscore_chats,-0.77002302012911
diff --git a/tests/data/cleaned_data/test_chat_level.csv b/tests/data/cleaned_data/test_chat_level.csv
index 94fb7f45..4ffe486b 100644
--- a/tests/data/cleaned_data/test_chat_level.csv
+++ b/tests/data/cleaned_data/test_chat_level.csv
@@ -733,4 +733,14 @@ H,1,fantastic difficulty,dale_chall_classification,difficult
 H,3,trying quote random,dale_chall_classification,easy
 H,3,erase eraser errand every even dig dim dime dine computer,dale_chall_classification,medium
 H,3,discover direction different,dale_chall_classification,easy
-H,4,even this is magnificent! even this is magnificent!,dale_chall_classification,difficult
\ No newline at end of file
+H,4,even this is magnificent! even this is magnificent!,dale_chall_classification,difficult
+I,1,I am enjoying the weather today!,positivity_zscore_conversation,0.855868
+I,2,Likewise it's beautiful.,positivity_zscore_conversation,0.8156
+I,3,I'm not a fan of the rain.,positivity_zscore_conversation,-1.2577
+I,1,I think it's enchanting.,positivity_zscore_conversation,0.7766
+I,3,Agree to disagree.,positivity_zscore_conversation,-1.19044
+J,1,This conversation is more neutral.,positivity_zscore_conversation,-0.002217
+J,2,How are you doing?,positivity_zscore_conversation,-0.443621
+J,3,I am mainly studying today.,positivity_zscore_conversation,-0.704912
+J,1,Anything fun planned?,positivity_zscore_conversation,1.925224
+J,3,Mostly a trip to the library.,positivity_zscore_conversation,-0.774473
\ No newline at end of file
diff --git a/tests/ipython_notebooks/helper.ipynb b/tests/ipython_notebooks/helper.ipynb
index b8667236..8f042a9b 100644
--- a/tests/ipython_notebooks/helper.ipynb
+++ b/tests/ipython_notebooks/helper.ipynb
@@ -2771,6 +2771,242 @@
     "print(first_person_regex)\n",
     "re.findall(first_person_regex, \"hiii\")\n"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>message</th>\n",
+       "      <th>expected_value</th>\n",
+       "      <th>positive_bert</th>\n",
+       "      <th>positivity_zscore_chats</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>i am enjoying the weather today</td>\n",
+       "      <td>0.855868</td>\n",
+       "      <td>0.986509</td>\n",
+       "      <td>1.434698</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>likewise its beautiful</td>\n",
+       "      <td>0.815600</td>\n",
+       "      <td>0.968142</td>\n",
+       "      <td>1.386228</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>im not a fan of the rain</td>\n",
+       "      <td>-1.257700</td>\n",
+       "      <td>0.021985</td>\n",
+       "      <td>-1.110576</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>i think its enchanting</td>\n",
+       "      <td>0.776600</td>\n",
+       "      <td>0.950373</td>\n",
+       "      <td>1.339338</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>agree to disagree</td>\n",
+       "      <td>-1.190440</td>\n",
+       "      <td>0.052687</td>\n",
+       "      <td>-1.029556</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>this conversation is more neutral</td>\n",
+       "      <td>-0.002217</td>\n",
+       "      <td>0.289333</td>\n",
+       "      <td>-0.405074</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>how are you doing</td>\n",
+       "      <td>-0.443621</td>\n",
+       "      <td>0.210286</td>\n",
+       "      <td>-0.613671</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>i am mainly studying today</td>\n",
+       "      <td>-0.704912</td>\n",
+       "      <td>0.163493</td>\n",
+       "      <td>-0.737150</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>anything fun planned</td>\n",
+       "      <td>1.925224</td>\n",
+       "      <td>0.634501</td>\n",
+       "      <td>0.505787</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>mostly a trip to the library</td>\n",
+       "      <td>-0.774473</td>\n",
+       "      <td>0.151037</td>\n",
+       "      <td>-0.770023</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                             message  expected_value  positive_bert  \\\n",
+       "0    i am enjoying the weather today        0.855868       0.986509   \n",
+       "1             likewise its beautiful        0.815600       0.968142   \n",
+       "2           im not a fan of the rain       -1.257700       0.021985   \n",
+       "3             i think its enchanting        0.776600       0.950373   \n",
+       "4                  agree to disagree       -1.190440       0.052687   \n",
+       "5  this conversation is more neutral       -0.002217       0.289333   \n",
+       "6                  how are you doing       -0.443621       0.210286   \n",
+       "7         i am mainly studying today       -0.704912       0.163493   \n",
+       "8               anything fun planned        1.925224       0.634501   \n",
+       "9       mostly a trip to the library       -0.774473       0.151037   \n",
+       "\n",
+       "   positivity_zscore_chats  \n",
+       "0                 1.434698  \n",
+       "1                 1.386228  \n",
+       "2                -1.110576  \n",
+       "3                 1.339338  \n",
+       "4                -1.029556  \n",
+       "5                -0.405074  \n",
+       "6                -0.613671  \n",
+       "7                -0.737150  \n",
+       "8                 0.505787  \n",
+       "9                -0.770023  "
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import scipy.stats as stats\n",
+    "\n",
+    "pd.read_csv(\"./output/chat/test_positivity_chat_level.csv\")[['message', \"expected_value\", \"positive_bert\", 'positivity_zscore_chats']]\n",
+    "# pd.read_csv(\"./output/chat/test_positivity_chat_level.csv\")[[\"expected_value\"]]\n",
+    "\n",
+    "\n",
+    "# A,1,I am enjoying the weather today!,positivity_zscore_chats,0.855868\n",
+    "# A,2,Likewise it's beautiful.,positivity_zscore_chats,0.8156\n",
+    "# A,3,I'm not a fan of the rain.,positivity_zscore_chats,-1.2577\n",
+    "# A,1,I think it's enchanting.,positivity_zscore_chats,0.7766\n",
+    "# A,3,Agree to disagree.,positivity_zscore_chats,-1.19044\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = pd.read_csv(\"./data/cleaned_data/positivity_zscore_chats.csv\")\n",
+    "df['expected_value'] = stats.zscore(pd.read_csv(\"./output/chat/test_positivity_chat_level.csv\")['positivity_zscore_chats'])\n",
+    "df.to_csv(\"./data/cleaned_data/positivity_zscore_chats.csv\", index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>positive_bert</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0.986509</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0.968142</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0.021985</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0.950373</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0.052687</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   positive_bert\n",
+       "0       0.986509\n",
+       "1       0.968142\n",
+       "2       0.021985\n",
+       "3       0.950373\n",
+       "4       0.052687"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.read_csv(\"./output/chat/test_positivity_chat_level.csv\")[[\"positive_bert\"]]\n"
+   ]
   }
  ],
  "metadata": {
diff --git a/tests/run_tests.py b/tests/run_tests.py
index 3b8103a7..c15eaaba 100644
--- a/tests/run_tests.py
+++ b/tests/run_tests.py
@@ -29,9 +29,28 @@
 	test_forward_flow_df = pd.read_csv("data/cleaned_data/fflow.csv", encoding=chat_encoding['encoding'])
 	conv_complex_timestamps_df = pd.read_csv("data/cleaned_data/test_conv_level_complex_timestamps.csv", encoding=chat_encoding['encoding'])
 
+	positivity_zscore = pd.read_csv("data/cleaned_data/positivity_zscore_chats.csv", encoding=chat_encoding['encoding'])
+
 		
 	# TESTING DATASETS -------------------------------
 
+	test_positivity = FeatureBuilder(
+		input_df = positivity_zscore,
+		vector_directory = "./vector_data/",
+		output_file_path_chat_level = "./output/chat/test_positivity_chat_level.csv",
+		output_file_path_user_level = "./output/user/test_positivity_user_level.csv",
+		output_file_path_conv_level = "./output/conv/test_positivity_conv_level.csv",
+		custom_features = [ # these require vect_data, so they now need to be explicitly included in order to calculate them
+			"(BERT) Mimicry",
+			"Moving Mimicry",
+			"Forward Flow",
+			"Discursive Diversity"
+		],
+		turns = False,
+		regenerate_vectors = True
+	)
+	test_positivity.featurize()
+
 	testing_chat = FeatureBuilder(
 		input_df = chat_df,
 		vector_directory = "./vector_data/",
diff --git a/tests/test_feature_metrics.py b/tests/test_feature_metrics.py
index e52e67d4..3289d20f 100644
--- a/tests/test_feature_metrics.py
+++ b/tests/test_feature_metrics.py
@@ -8,7 +8,8 @@
 
 test_chat_df = pd.read_csv("./output/chat/test_chat_level_chat.csv")
 test_info_exchange_zscore_df = pd.read_csv("./output/chat/info_exchange_zscore_chats.csv")
-test_chat_df = pd.concat([test_chat_df, test_info_exchange_zscore_df], axis=0)
+test_pos = pd.read_csv("./output/chat/test_positivity_chat_level.csv")
+test_chat_df = pd.concat([test_chat_df, test_info_exchange_zscore_df, test_pos], axis=0)
 test_conv_df = pd.read_csv("./output/conv/test_conv_level_conv.csv")
 test_chat_complex_df = pd.read_csv(
     "./output/chat/test_chat_level_chat_complex.csv")

	message	expected_value	positive_bert	positivity_zscore_chats
0	i am enjoying the weather today	0.855868	0.986509	1.434698
1	likewise its beautiful	0.815600	0.968142	1.386228
2	im not a fan of the rain	-1.257700	0.021985	-1.110576
3	i think its enchanting	0.776600	0.950373	1.339338
4	agree to disagree	-1.190440	0.052687	-1.029556
5	this conversation is more neutral	-0.002217	0.289333	-0.405074
6	how are you doing	-0.443621	0.210286	-0.613671
7	i am mainly studying today	-0.704912	0.163493	-0.737150
8	anything fun planned	1.925224	0.634501	0.505787
9	mostly a trip to the library	-0.774473	0.151037	-0.770023