Created using Colaboratory

gumdropsteve · gumdropsteve · commit 30f6e15ce3d2 · 2021-01-21T02:29:26.000-08:00
diff --git a/day_15/01_intro_dask.ipynb b/day_15/01_intro_dask.ipynb
@@ -5,14 +5,25 @@
     "colab": {
       "name": "01_intro_dask.ipynb",
       "provenance": [],
-      "authorship_tag": "ABX9TyOkip/jPKANIdmhdkT7IEkI"
+      "authorship_tag": "ABX9TyOkip/jPKANIdmhdkT7IEkI",
+      "include_colab_link": true
     },
     "kernelspec": {
       "name": "python3",
       "display_name": "Python 3"
     }
   },
   "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/gumdropsteve/intro_to_python/blob/main/day_15/01_intro_dask.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -46,7 +57,7 @@
         "\r\n",
         "df"
       ],
-      "execution_count": 1,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -110,7 +121,7 @@
       "source": [
         "df.to_csv('small.csv', index=False)"
       ],
-      "execution_count": 2,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -125,7 +136,7 @@
       "source": [
         "!python -m pip install \"dask[dataframe]\""
       ],
-      "execution_count": 4,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -166,7 +177,7 @@
         "\r\n",
         "dd.read_csv('small.csv')"
       ],
-      "execution_count": 5,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -254,7 +265,7 @@
       "source": [
         "dd.read_csv('small.csv').compute()"
       ],
-      "execution_count": 6,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -322,7 +333,7 @@
       "source": [
         "type(dd.read_csv('small.csv').compute())"
       ],
-      "execution_count": 7,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -353,7 +364,7 @@
         "\r\n",
         "df.describe()"
       ],
-      "execution_count": 12,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -432,7 +443,7 @@
       "source": [
         "df.describe().compute()"
       ],
-      "execution_count": 13,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -551,7 +562,7 @@
         "graph = df.__dask_graph__()\r\n",
         "graph.layers"
       ],
-      "execution_count": 18,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -614,7 +625,7 @@
         "# dataframe as is now\r\n",
         "df"
       ],
-      "execution_count": 19,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -694,7 +705,7 @@
         "# just reading\r\n",
         "dd.read_csv('small.csv')"
       ],
-      "execution_count": 20,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -782,7 +793,7 @@
       "source": [
         "df.compute()"
       ],
-      "execution_count": 21,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -862,7 +873,7 @@
         "\r\n",
         "!wget \"https://github.com/gumdropsteve/datasets/raw/master/airlines.parquet\""
       ],
-      "execution_count": 27,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -902,7 +913,7 @@
         "%%time\r\n",
         "pd.read_parquet('airlines.parquet')"
       ],
-      "execution_count": 28,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -1181,7 +1192,7 @@
         "%%time\r\n",
         "dd.read_parquet('airlines.parquet')"
       ],
-      "execution_count": 29,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -1317,7 +1328,7 @@
         "%%time\r\n",
         "dd.read_parquet('airlines.parquet').compute()"
       ],
-      "execution_count": 33,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -1599,7 +1610,7 @@
         "\r\n",
         "df.compute()"
       ],
-      "execution_count": 45,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -1628,7 +1639,7 @@
         "\r\n",
         "df"
       ],
-      "execution_count": 46,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -1681,7 +1692,7 @@
         "total = sum(output)\r\n",
         "total"
       ],
-      "execution_count": 7,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -1721,7 +1732,7 @@
         "\r\n",
         "total.visualize()"
       ],
-      "execution_count": 10,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -1750,7 +1761,7 @@
       "source": [
         "total.compute()"
       ],
-      "execution_count": 11,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -1789,7 +1800,7 @@
         "\r\n",
         "compute(*[total, total])"
       ],
-      "execution_count": 12,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -1826,7 +1837,7 @@
         "\r\n",
         "df = dd.read_csv('iris.csv')"
       ],
-      "execution_count": 17,
+      "execution_count": null,
       "outputs": []
     },
     {
@@ -1841,7 +1852,7 @@
       "source": [
         "compute([df])"
       ],
-      "execution_count": 18,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -1882,7 +1893,7 @@
       "source": [
         "compute(*[total, total, df])"
       ],
-      "execution_count": 19,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -1925,7 +1936,7 @@
       "source": [
         "compute(*[total, df, total])"
       ],
-      "execution_count": 20,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",