Skip to content

Commit

Permalink
notebook edits
Browse files Browse the repository at this point in the history
  • Loading branch information
jlooper committed Jun 29, 2021
1 parent b57fdec commit 6332008
Show file tree
Hide file tree
Showing 3 changed files with 180 additions and 45 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,23 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": 3
"version": "3.7.0"
},
"orig_nbformat": 4
"orig_nbformat": 4,
"kernelspec": {
"name": "python3",
"display_name": "Python 3.7.0 64-bit ('3.7')"
},
"interpreter": {
"hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d"
}
},
"nbformat": 4,
"nbformat_minor": 2,
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -30,7 +37,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -54,7 +61,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -65,7 +72,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -75,7 +82,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -85,7 +92,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -98,7 +105,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -118,23 +125,32 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# No longer need any of these columns\n",
"df.drop([\"Tags\", \"Review_Date\", \"Review_Total_Negative_Word_Counts\", \"Review_Total_Positive_Word_Counts\", \"days_since_review\", \"Total_Number_of_Reviews_Reviewer_Has_Given\"], axis = 1, inplace=True)\n"
"df.drop([\"Review_Date\", \"Review_Total_Negative_Word_Counts\", \"Review_Total_Positive_Word_Counts\", \"days_since_review\", \"Total_Number_of_Reviews_Reviewer_Has_Given\"], axis = 1, inplace=True)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"metadata": {},
"outputs": [],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Saving results to Hotel_Reviews_Filtered.csv\n",
"Filtering took 23.74 seconds\n"
]
}
],
"source": [
"# Saving new data file with calculated columns\n",
"print(\"Saving results to Hotel_Reviews_Filtered.csv\")\n",
"df.to_csv(r'Hotel_Reviews_Filtered.csv', index = False)\n",
"df.to_csv(r'../../data/Hotel_Reviews_Filtered.csv', index = False)\n",
"end = time.time()\n",
"print(\"Filtering took \" + str(round(end - start, 2)) + \" seconds\")\n"
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,26 +10,35 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": 3
"version": "3.7.0"
},
"orig_nbformat": 4
"orig_nbformat": 4,
"kernelspec": {
"name": "python3",
"display_name": "Python 3.7.0 64-bit ('3.7')"
},
"interpreter": {
"hash": "70b38d7a306a849643e446cd70466270a13445e5987dfa1344ef2b127438fa4d"
}
},
"nbformat": 4,
"nbformat_minor": 2,
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Load the hotel reviews from CSV (you can )\n",
"import pandas as pd \n",
"\n",
"df = pd.read_csv('../../data/Hotel_Reviews_Filtered.csv')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -42,7 +51,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -53,7 +62,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -68,7 +77,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -78,9 +87,27 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 9,
"metadata": {},
"outputs": [],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"The shape of the tags with no filtering: (2514684, 2)\n",
" index count\n",
"0 Leisure trip 338423\n",
"1 Couple 205305\n",
"2 Solo traveler 89779\n",
"3 Business trip 68176\n",
"4 Group 51593\n",
"5 Family with young children 49318\n",
"6 Family with older children 21509\n",
"7 Travelers with friends 1610\n",
"8 With a pet 1078\n"
]
}
],
"source": [
"# Get the value counts\n",
"tag_vc = df_tags.value.value_counts()\n",
Expand Down
Loading

0 comments on commit 6332008

Please sign in to comment.