Skip to content

Commit

Permalink
Update
Browse files Browse the repository at this point in the history
  • Loading branch information
trajanov committed Oct 8, 2024
1 parent d51af7b commit c9c18e0
Show file tree
Hide file tree
Showing 11 changed files with 814 additions and 508 deletions.
29 changes: 15 additions & 14 deletions Notebooks/Spark-Example-19-Logistic-Regression.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -84,7 +84,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -124,7 +124,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -161,7 +161,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 7,
"metadata": {},
"outputs": [
{
Expand All @@ -172,7 +172,7 @@
" (0, array([ -0.86104732, -10.42137477, 1. ]))]"
]
},
"execution_count": 4,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -221,12 +221,13 @@
"\n",
"$$\n",
"\\frac{\\partial LLH}{\\partial \\beta_j} =\\sum_{i=1}^n - y_i x_{i,j} + x_{i,j} (\\frac{ e^{\\theta_i} }{1 + e^{\\theta_i}})\n",
"= \\sum_{i=1}^n -x_{i,j}( y_i + \\frac{ e^{\\theta_i} }{1 + e^{\\theta_i}})\n",
"$$"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 8,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -283,7 +284,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 9,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -335,7 +336,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 10,
"metadata": {},
"outputs": [
{
Expand All @@ -346,7 +347,7 @@
" (1, array([ 0.10996697, -3.83318403, 1. ]))]"
]
},
"execution_count": 7,
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -360,7 +361,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 11,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -409,7 +410,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 12,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -466,7 +467,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 17,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -523,7 +524,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 18,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -575,7 +576,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 19,
"metadata": {},
"outputs": [
{
Expand Down
2 changes: 1 addition & 1 deletion Notebooks/Spark-Example-19b-SVM.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "base",
"display_name": "spark",
"language": "python",
"name": "python3"
},
Expand Down
7 changes: 1 addition & 6 deletions Notebooks/Spark-Example-20b-Imbalanced-Classes.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -750,7 +750,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.10.4 ('spark')",
"display_name": "spark",
"language": "python",
"name": "python3"
},
Expand All @@ -765,11 +765,6 @@
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
},
"vscode": {
"interpreter": {
"hash": "9db6cbf0fd79f8e79653fe7b0c50b956ca6e525ee712295da3c66f75e4fe96ce"
}
}
},
"nbformat": 4,
Expand Down
18 changes: 9 additions & 9 deletions Notebooks/Spark-Example-22-Mllib-Clustering.ipynb

Large diffs are not rendered by default.

82 changes: 55 additions & 27 deletions Notebooks/Spark-Example-22a-Mllib-Recomender.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,13 @@
"(training_data, test_data) = ratings_data.randomSplit([0.8, 0.2], seed=1234)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Create an ALS model"
]
},
{
"cell_type": "code",
"execution_count": 3,
Expand Down Expand Up @@ -135,6 +142,13 @@
" \"recommendations.rating as rating\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Evaluate the model by computing the RMSE on the test data"
]
},
{
"cell_type": "code",
"execution_count": 4,
Expand All @@ -144,7 +158,7 @@
"name": "stdout",
"output_type": "stream",
"text": [
"Root-mean-square error = 1.139236762745733\n"
"Root-mean-square error = 1.1085100193424526\n"
]
}
],
Expand All @@ -158,6 +172,13 @@
"print(\"Root-mean-square error = \" + str(rmse))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Show the recommendations for a specific user"
]
},
{
"cell_type": "code",
"execution_count": 5,
Expand Down Expand Up @@ -203,20 +224,20 @@
"+----------------------------------------------------+-----------------------------------------------+------+\n",
"\n",
"Top 10 recommendations for user with id 2\n",
"+------------------------------------------+--------------------------+---------+\n",
"|title |genres |rating |\n",
"+------------------------------------------+--------------------------+---------+\n",
"|I Now Pronounce You Chuck and Larry (2007)|Comedy|Romance |7.5183773|\n",
"|Empire Records (1995) |Comedy|Drama |7.390065 |\n",
"|Charlie Brown Christmas, A (1965) |Animation|Children|Comedy |7.3883414|\n",
"|Focus (2015) |Comedy|Crime|Drama|Romance|7.3361917|\n",
"|Just Go with It (2011) |Comedy|Romance |7.277938 |\n",
"|Hangover Part II, The (2011) |Comedy |7.1638694|\n",
"|Deadpool 2 (2018) |Action|Comedy|Sci-Fi |7.1230917|\n",
"|A Quiet Place (2018) |Drama|Horror|Thriller |7.0971427|\n",
"|John Tucker Must Die (2006) |Comedy|Romance |6.98362 |\n",
"|Proposal, The (2009) |Comedy|Romance |6.9803677|\n",
"+------------------------------------------+--------------------------+---------+\n",
"+------------------------------------------------------------------+-------------------------------------+---------+\n",
"|title |genres |rating |\n",
"+------------------------------------------------------------------+-------------------------------------+---------+\n",
"|Flash Gordon (1980) |Action|Adventure|Sci-Fi |9.5280485|\n",
"|Proposal, The (2009) |Comedy|Romance |7.8743415|\n",
"|Persuasion (1995) |Drama|Romance |7.822497 |\n",
"|Sullivan's Travels (1941) |Adventure|Comedy|Romance |7.6006355|\n",
"|Red Dawn (1984) |Action|Drama|War |7.580669 |\n",
"|Where the Wild Things Are (2009) |Adventure|Children|Drama|Fantasy|IMAX|7.34213 |\n",
"|Mist, The (2007) |Horror|Sci-Fi |7.2632823|\n",
"|Adventures of Buckaroo Banzai Across the 8th Dimension, The (1984)|Adventure|Comedy|Sci-Fi |7.175868 |\n",
"|Family Guy Presents: It's a Trap (2010) |Animation|Comedy|Sci-Fi |7.139153 |\n",
"|Slackers (2002) |Comedy |7.116838 |\n",
"+------------------------------------------------------------------+-------------------------------------+---------+\n",
"\n"
]
}
Expand All @@ -238,12 +259,19 @@
" .select(\"title\", \"genres\", \"rating\")\n",
"print(\"Top 10 recommendations for user with id \" + str(user_id))\n",
"# Show the recommendations for a specific user\n",
"user_rec.show(truncate=False)\n"
"user_rec.show(truncate=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Generate top 10 user recommendations for each movie"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"metadata": {},
"outputs": [
{
Expand All @@ -254,16 +282,16 @@
"+-------+------+---------+\n",
"|movieId|userId|rating |\n",
"+-------+------+---------+\n",
"|2 |485 |6.0814233|\n",
"|2 |413 |6.0620027|\n",
"|2 |46 |5.8265424|\n",
"|2 |243 |5.438458 |\n",
"|2 |399 |5.434802 |\n",
"|2 |43 |5.434675 |\n",
"|2 |486 |5.3800526|\n",
"|2 |468 |5.3796015|\n",
"|2 |388 |5.2158947|\n",
"|2 |240 |5.1414065|\n",
"|2 |258 |6.898732 |\n",
"|2 |543 |5.9302106|\n",
"|2 |407 |5.830795 |\n",
"|2 |48 |5.375635 |\n",
"|2 |35 |5.1472993|\n",
"|2 |162 |4.9345474|\n",
"|2 |553 |4.8886347|\n",
"|2 |53 |4.881683 |\n",
"|2 |478 |4.8594093|\n",
"|2 |584 |4.8578053|\n",
"+-------+------+---------+\n",
"\n"
]
Expand Down
2 changes: 1 addition & 1 deletion Notebooks/Spark-Example-23-Mllib-Sentiment Model.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1129,7 +1129,7 @@
"idf = IDF(inputCol=countVectorizer.getOutputCol(), outputCol=\"featuresIDF\")\n",
"\n",
"# Select the top 200 features based on their chi-squared test value\n",
"selector = ChiSqSelector(numTopFeatures=200, featuresCol=idf.getOutputCol(), outputCol=\"features\", labelCol=\"label\")\n",
"selector = (numTopFeatures=200, featuresCol=idf.getOutputCol(), outputCol=\"features\", labelCol=\"label\")\n",
"# Crate a preprocessing pipeline with 5 stages\n",
"pipeline_p = Pipeline(stages=[tokenizer,remover, countVectorizer, idf,selector])\n",
"# Learn the data preprocessing model\n",
Expand Down
Loading

0 comments on commit c9c18e0

Please sign in to comment.