From f95045e2534b9705e0acb7806ce13bc877e3d62d Mon Sep 17 00:00:00 2001 From: lgaalves Date: Tue, 2 Jul 2019 10:03:10 -0300 Subject: [PATCH] solutions --- .../06 - Mini-Project.ipynb | 128 ++++++++++++++++-- 1 file changed, 118 insertions(+), 10 deletions(-) diff --git a/day-2-introduction-to-python-part-II-and-web-scraping/06 - Mini-Project.ipynb b/day-2-introduction-to-python-part-II-and-web-scraping/06 - Mini-Project.ipynb index b2ea8c6..e909eaa 100755 --- a/day-2-introduction-to-python-part-II-and-web-scraping/06 - Mini-Project.ipynb +++ b/day-2-introduction-to-python-part-II-and-web-scraping/06 - Mini-Project.ipynb @@ -504,11 +504,11 @@ }, { "cell_type": "code", - "execution_count": 154, + "execution_count": 186, "metadata": { "ExecuteTime": { - "end_time": "2019-07-02T12:27:15.851723Z", - "start_time": "2019-07-02T12:27:15.626855Z" + "end_time": "2019-07-02T12:49:29.742836Z", + "start_time": "2019-07-02T12:49:29.532825Z" } }, "outputs": [ @@ -697,13 +697,65 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 209, "metadata": { - "collapsed": true + "ExecuteTime": { + "end_time": "2019-07-02T12:55:14.180254Z", + "start_time": "2019-07-02T12:55:14.175575Z" + } }, "outputs": [], "source": [ - "#Test to see if the groups all have the same population mean\n" + "#Test to see if the groups all have the same population mean\n", + "category_employed=[]\n", + "category_unemployed=[]\n", + "for major_category in all_majors:\n", + " employed=[]\n", + " unemployed=[]\n", + " for major in major_category:\n", + " employed.append(major['Employed'])\n", + " unemployed.append(major['Unemployed']) \n", + " category_employed.append(employed)\n", + " category_unemployed.append(unemployed)" + ] + }, + { + "cell_type": "code", + "execution_count": 210, + "metadata": { + "ExecuteTime": { + "end_time": "2019-07-02T12:55:14.756162Z", + "start_time": "2019-07-02T12:55:14.752269Z" + } + }, + "outputs": [], + "source": [ + "from scipy import stats" + ] + }, + { + "cell_type": "code", + "execution_count": 212, + "metadata": { + "ExecuteTime": { + "end_time": "2019-07-02T12:55:18.622152Z", + "start_time": "2019-07-02T12:55:18.613466Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "F_onewayResult(statistic=1.9640512058789068, pvalue=0.02099887134728745)" + ] + }, + "execution_count": 212, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stats.f_oneway(*category_employed)" ] }, { @@ -720,15 +772,71 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 222, "metadata": { - "collapsed": true + "ExecuteTime": { + "end_time": "2019-07-02T13:01:35.386804Z", + "start_time": "2019-07-02T13:01:35.360534Z" + } }, - "outputs": [], + "outputs": [ + { + "ename": "TypeError", + "evalue": "random_sample() takes at most 1 positional argument (2 given)", + "output_type": "error", + "traceback": [ + "\u001b[0;31m----------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mlenj\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcategory_employed\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mj\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0mmin_len\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mleni\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mlenj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0msample1\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msample\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcategory_employed\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mmin_len\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 9\u001b[0m \u001b[0msample2\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msample\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcategory_employed\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mmin_len\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mleni\u001b[0m\u001b[0;34m==\u001b[0m\u001b[0mlenj\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32mmtrand.pyx\u001b[0m in \u001b[0;36mmtrand.RandomState.random_sample\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mTypeError\u001b[0m: random_sample() takes at most 1 positional argument (2 given)" + ] + } + ], "source": [ "#Test the pairwise associations\n", - "\n" + "for i in range(0, len(category_employed)):\n", + " for j in range(0, len(category_employed)):\n", + " if i >j:\n", + " leni=len(category_employed[i])\n", + " lenj=len(category_employed[j])\n", + " min_len=min(leni,lenj)\n", + " sample1=np.random.sample(category_employed[i],min_len)\n", + " sample2=np.random.sample(category_employed[i],min_len)\n", + " if leni==lenj: \n", + " print(stats.ttest_rel(sample1, sample2))" ] + }, + { + "cell_type": "code", + "execution_count": 221, + "metadata": { + "ExecuteTime": { + "end_time": "2019-07-02T13:01:03.762814Z", + "start_time": "2019-07-02T13:01:03.750650Z" + } + }, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'min_len' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m----------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrandom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msample\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcategory_employed\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mmin_len\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'min_len' is not defined" + ] + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {