diff --git a/program/cohort.ipynb b/program/cohort.ipynb index 9f69a20..9dcc701 100644 --- a/program/cohort.ipynb +++ b/program/cohort.ipynb @@ -42,21 +42,10 @@ }, { "cell_type": "code", - "execution_count": 236, + "execution_count": 2, "id": "4b2265b0", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n", - "The dotenv extension is already loaded. To reload it, use:\n", - " %reload_ext dotenv\n" - ] - } - ], + "outputs": [], "source": [ "#| hide\n", "\n", @@ -100,7 +89,7 @@ }, { "cell_type": "code", - "execution_count": 237, + "execution_count": 3, "id": "32c4d764", "metadata": {}, "outputs": [], @@ -118,7 +107,7 @@ }, { "cell_type": "code", - "execution_count": 238, + "execution_count": 4, "id": "3164a3af", "metadata": {}, "outputs": [], @@ -141,7 +130,7 @@ }, { "cell_type": "code", - "execution_count": 239, + "execution_count": 5, "id": "7bc40d28", "metadata": {}, "outputs": [], @@ -160,7 +149,7 @@ }, { "cell_type": "code", - "execution_count": 240, + "execution_count": 6, "id": "3b3f17e5", "metadata": {}, "outputs": [], @@ -200,7 +189,7 @@ }, { "cell_type": "code", - "execution_count": 241, + "execution_count": 7, "id": "942a01b5", "metadata": {}, "outputs": [], @@ -241,7 +230,7 @@ }, { "cell_type": "code", - "execution_count": 242, + "execution_count": 8, "id": "f1cd2f0e-446d-48a9-a008-b4f1cc593bfc", "metadata": { "tags": [] @@ -348,7 +337,7 @@ "4 3450.0 FEMALE " ] }, - "execution_count": 242, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -385,7 +374,7 @@ }, { "cell_type": "code", - "execution_count": 243, + "execution_count": 9, "id": "f2107c25-e730-4e22-a1b8-5bda53e61124", "metadata": { "tags": [] @@ -564,7 +553,7 @@ "max 6300.000000 NaN " ] }, - "execution_count": 243, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -583,7 +572,7 @@ }, { "cell_type": "code", - "execution_count": 244, + "execution_count": 10, "id": "1242122a-726e-4c37-a718-dd8e873d1612", "metadata": { "tags": [] @@ -641,7 +630,7 @@ }, { "cell_type": "code", - "execution_count": 245, + "execution_count": 11, "id": "cf1cf582-8831-4f83-bb17-2175afb193e8", "metadata": { "tags": [] @@ -656,7 +645,7 @@ "Name: count, dtype: int64" ] }, - "execution_count": 245, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -676,7 +665,7 @@ }, { "cell_type": "code", - "execution_count": 246, + "execution_count": 12, "id": "cc42cb08-275c-4b05-9d2b-77052da2f336", "metadata": { "tags": [] @@ -695,7 +684,7 @@ "dtype: int64" ] }, - "execution_count": 246, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -714,7 +703,7 @@ }, { "cell_type": "code", - "execution_count": 247, + "execution_count": 13, "id": "3c57d55d-afd6-467a-a7a8-ff04132770ed", "metadata": { "tags": [] @@ -733,7 +722,7 @@ "dtype: int64" ] }, - "execution_count": 247, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -756,7 +745,7 @@ }, { "cell_type": "code", - "execution_count": 248, + "execution_count": 14, "id": "2852c740", "metadata": {}, "outputs": [ @@ -802,7 +791,7 @@ }, { "cell_type": "code", - "execution_count": 249, + "execution_count": 15, "id": "707cc972", "metadata": {}, "outputs": [ @@ -850,7 +839,7 @@ }, { "cell_type": "code", - "execution_count": 250, + "execution_count": 16, "id": "3daf3ba1-d218-4ad4-b862-af679b91273f", "metadata": { "tags": [] @@ -930,7 +919,7 @@ "body_mass_g 640316.716388 " ] }, - "execution_count": 250, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -955,7 +944,7 @@ }, { "cell_type": "code", - "execution_count": 251, + "execution_count": 17, "id": "1d793e09-2cb9-47ff-a0e6-199a0f4fc1b3", "metadata": { "tags": [] @@ -1035,7 +1024,7 @@ "body_mass_g 1.000000 " ] }, - "execution_count": 251, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -1060,7 +1049,7 @@ }, { "cell_type": "code", - "execution_count": 252, + "execution_count": 18, "id": "1258c99d", "metadata": {}, "outputs": [ @@ -1100,7 +1089,7 @@ }, { "cell_type": "code", - "execution_count": 253, + "execution_count": 19, "id": "45b0a87f-028d-477f-9b65-199728c0b7ee", "metadata": { "tags": [] @@ -1154,7 +1143,7 @@ }, { "cell_type": "code", - "execution_count": 254, + "execution_count": 20, "id": "fb6ba7c0-1bd6-4fe5-8b7f-f6cbdfd3846c", "metadata": { "tags": [] @@ -1177,21 +1166,16 @@ "#| code-line-numbers: true\n", "\n", "import os\n", - "import sys\n", - "import argparse\n", - "import json\n", "import tarfile\n", "import tempfile\n", - "import time\n", "import joblib\n", "import numpy as np\n", "import pandas as pd\n", "\n", - "from io import StringIO\n", "from pathlib import Path\n", "from sklearn.compose import ColumnTransformer, make_column_selector\n", "from sklearn.impute import SimpleImputer\n", - "from sklearn.pipeline import Pipeline, make_pipeline\n", + "from sklearn.pipeline import make_pipeline\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import OneHotEncoder, StandardScaler, OrdinalEncoder\n", "\n", @@ -1350,7 +1334,7 @@ }, { "cell_type": "code", - "execution_count": 255, + "execution_count": 21, "id": "d1f122a4-acff-4687-91b9-bfef13567d88", "metadata": { "tags": [] @@ -1360,8 +1344,15 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001b[32m.\u001b[0m\u001b[32m.\u001b[0m\u001b[32m.\u001b[0m\u001b[32m.\u001b[0m\u001b[32m.\u001b[0m\u001b[32m.\u001b[0m\u001b[32m.\u001b[0m\u001b[32m.\u001b[0m\n", - "\u001b[32m\u001b[32m\u001b[1m8 passed\u001b[0m\u001b[32m in 0.15s\u001b[0m\u001b[0m\n" + "\u001b[32m.\u001b[0m\u001b[32m.\u001b[0m" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[32m.\u001b[0m\u001b[32m.\u001b[0m\u001b[32m.\u001b[0m\u001b[32m.\u001b[0m\u001b[32m.\u001b[0m\u001b[32m.\u001b[0m\n", + "\u001b[32m\u001b[32m\u001b[1m8 passed\u001b[0m\u001b[32m in 0.17s\u001b[0m\u001b[0m\n" ] } ], @@ -1487,7 +1478,7 @@ }, { "cell_type": "code", - "execution_count": 256, + "execution_count": 344, "id": "d88e9ccf", "metadata": {}, "outputs": [], @@ -1507,7 +1498,7 @@ }, { "cell_type": "code", - "execution_count": 257, + "execution_count": 345, "id": "331fe373", "metadata": {}, "outputs": [], @@ -1534,7 +1525,7 @@ }, { "cell_type": "code", - "execution_count": 258, + "execution_count": 346, "id": "3aa4471a", "metadata": {}, "outputs": [ @@ -1581,7 +1572,7 @@ }, { "cell_type": "code", - "execution_count": 259, + "execution_count": 347, "id": "cdbd9303", "metadata": { "tags": [] @@ -1662,7 +1653,7 @@ }, { "cell_type": "code", - "execution_count": 260, + "execution_count": 348, "id": "e140642a", "metadata": { "tags": [] @@ -1672,16 +1663,16 @@ "data": { "text/plain": [ "{'PipelineArn': 'arn:aws:sagemaker:us-east-1:325223348818:pipeline/session1-pipeline',\n", - " 'ResponseMetadata': {'RequestId': '26abb4d9-9dc7-44f7-be35-cab1f5a41e02',\n", + " 'ResponseMetadata': {'RequestId': '1a583251-fced-460c-ad0f-ddda5589cbed',\n", " 'HTTPStatusCode': 200,\n", - " 'HTTPHeaders': {'x-amzn-requestid': '26abb4d9-9dc7-44f7-be35-cab1f5a41e02',\n", + " 'HTTPHeaders': {'x-amzn-requestid': '1a583251-fced-460c-ad0f-ddda5589cbed',\n", " 'content-type': 'application/x-amz-json-1.1',\n", " 'content-length': '85',\n", - " 'date': 'Tue, 31 Oct 2023 20:24:41 GMT'},\n", + " 'date': 'Wed, 01 Nov 2023 17:31:54 GMT'},\n", " 'RetryAttempts': 0}}" ] }, - "execution_count": 260, + "execution_count": 348, "metadata": {}, "output_type": "execute_result" } @@ -1730,10 +1721,21 @@ }, { "cell_type": "code", - "execution_count": 261, + "execution_count": 349, "id": "59d1e634", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "_PipelineExecution(arn='arn:aws:sagemaker:us-east-1:325223348818:pipeline/session1-pipeline/execution/jmk50mirn3i3', sagemaker_session=)" + ] + }, + "execution_count": 349, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "%%script false --no-raise-error\n", "#| eval: false\n", diff --git a/program/setup.qmd b/program/setup.qmd index bba8279..a7f9c0f 100644 --- a/program/setup.qmd +++ b/program/setup.qmd @@ -39,11 +39,11 @@ $ docker ps ## Configuring AWS -If you don't have one yet, create a new AWS account. You want to use a user with administrative privileges. +If you don't have one yet, create a new AWS account. A community member noticed that indicating the account is for personal use and his interest is in Machine Learning gave him immediate access to the hardware we need for the program. -We'll need access to `ml.m5.xlarge` instances during the program. By default, the quota for a new account is zero, so you need to request a quota increase. -You can do this in your AWS account, under Service Quotas > AWS Services > Amazon SageMaker. Find `ml.m5.xlarge` and request a quota increase for processing -jobs, training jobs, transform jobs, and endpoint usage. Ask for a minimum of 3 instances. +We'll need access to `ml.m5.xlarge` instances. By default, the quota for a new account is zero, but the tip above might fix this problem. If it doesn't, you'll need to request a quota increase. + +You can do this in your AWS account under Service Quotas > AWS Services > Amazon SageMaker. Find `ml.m5.xlarge` and request a quota increase for processing jobs, training jobs, transform jobs, and endpoint usage. Ask for a minimum of 3 instances. You'll need access to AWS from your local environment. [Install the AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) and [configure it](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html) with your `aws_access_key_id` and `aws_secret_access_key.`