|
76 | 76 | "import numpy as np\n",
|
77 | 77 | "import matplotlib.pyplot as plt\n",
|
78 | 78 | "\n",
|
79 |
| - "from pandas import set_option\n", |
80 |
| - "set_option(\"display.max_rows\", 16)\n", |
| 79 | + "pd.set_option(\"display.max_rows\", 16)\n", |
81 | 80 | "\n",
|
82 | 81 | "LARGE_FIGSIZE = (12, 8)"
|
83 | 82 | ]
|
|
91 | 90 | "outputs": [],
|
92 | 91 | "source": [
|
93 | 92 | "# Change this cell to the demo location on YOUR machine\n",
|
94 |
| - "%cd ~/Projects/SciPy2015_pandas_tutorial/demos/climate_timeseries/\n", |
| 93 | + "%cd ~/Projects/pandas_tutorial/climate_timeseries/\n", |
95 | 94 | "%ls"
|
96 | 95 | ]
|
97 | 96 | },
|
|
1714 | 1713 | "source": [
|
1715 | 1714 | "# Frequencies can be specified as strings: \"us\", \"ms\", \"S\", \"T\", \"H\", \"D\", \"B\", \"W\", \"M\", \"A\", \"3min\", \"2h20\", ...\n",
|
1716 | 1715 | "# More aliases at http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases\n",
|
1717 |
| - "full_globe_temp.resample(\"M\")" |
| 1716 | + "full_globe_temp.resample(\"M\").mean()" |
1718 | 1717 | ]
|
1719 | 1718 | },
|
1720 | 1719 | {
|
|
1725 | 1724 | },
|
1726 | 1725 | "outputs": [],
|
1727 | 1726 | "source": [
|
1728 |
| - "full_globe_temp.resample(\"10A\", how=\"mean\")" |
| 1727 | + "full_globe_temp.resample(\"10A\").mean()" |
1729 | 1728 | ]
|
1730 | 1729 | },
|
1731 | 1730 | {
|
|
1920 | 1919 | },
|
1921 | 1920 | "outputs": [],
|
1922 | 1921 | "source": [
|
1923 |
| - "local_sea_level_stations.sort(\"Date\")" |
| 1922 | + "local_sea_level_stations.sort_values(by=\"Date\")" |
1924 | 1923 | ]
|
1925 | 1924 | },
|
1926 | 1925 | {
|
|
1938 | 1937 | },
|
1939 | 1938 | "outputs": [],
|
1940 | 1939 | "source": [
|
1941 |
| - "local_sea_level_stations.sort([\"Date\", \"Country\"], ascending=False)" |
| 1940 | + "local_sea_level_stations.sort_values(by=[\"Date\", \"Country\"], ascending=False)" |
1942 | 1941 | ]
|
1943 | 1942 | },
|
1944 | 1943 | {
|
|
2186 | 2185 | "outputs": [],
|
2187 | 2186 | "source": [
|
2188 | 2187 | "full_globe_temp.plot()\n",
|
2189 |
| - "pd.rolling_mean(full_globe_temp, 10).plot(figsize=LARGE_FIGSIZE)" |
| 2188 | + "rolled_series = full_globe_temp.rolling(window=10, center=False)\n", |
| 2189 | + "print rolled_series\n", |
| 2190 | + "rolled_series.mean().plot(figsize=LARGE_FIGSIZE)" |
2190 | 2191 | ]
|
2191 | 2192 | },
|
2192 | 2193 | {
|
|
2648 | 2649 | },
|
2649 | 2650 | "outputs": [],
|
2650 | 2651 | "source": [
|
2651 |
| - "european_stations.sort(\"Country\")" |
| 2652 | + "european_stations.sort_values(by=\"Country\")" |
2652 | 2653 | ]
|
2653 | 2654 | },
|
2654 | 2655 | {
|
|
2817 | 2818 | "cell_type": "markdown",
|
2818 | 2819 | "metadata": {},
|
2819 | 2820 | "source": [
|
2820 |
| - "There are 2 objects constructors inside Pandas and inside `statsmodels`. There has been talks about merging the 2 into SM, but that hasn't happened yet. OLS in statsmodels allows more complex formulas:" |
| 2821 | + "The recommeded way to build ordinaty least square regressions is by using `statsmodels`." |
2821 | 2822 | ]
|
2822 | 2823 | },
|
2823 | 2824 | {
|
|
2888 | 2889 | "plt.legend(loc=\"upper left\")"
|
2889 | 2890 | ]
|
2890 | 2891 | },
|
2891 |
| - { |
2892 |
| - "cell_type": "markdown", |
2893 |
| - "metadata": {}, |
2894 |
| - "source": [ |
2895 |
| - "OLS in pandas requires to pass a `y` series and an `x` series to do a fit of the form `y ~ x`. But the formula can be more complex by providing a `DataFrame` for x and reproduce a formula of the form `y ~ x1 + x2`. \n", |
2896 |
| - "\n", |
2897 |
| - "Also, OLS in pandas allows to do rolling and expanding OLS:" |
2898 |
| - ] |
2899 |
| - }, |
2900 |
| - { |
2901 |
| - "cell_type": "code", |
2902 |
| - "execution_count": null, |
2903 |
| - "metadata": { |
2904 |
| - "collapsed": false |
2905 |
| - }, |
2906 |
| - "outputs": [], |
2907 |
| - "source": [ |
2908 |
| - "from pandas.stats.api import ols as pdols" |
2909 |
| - ] |
2910 |
| - }, |
2911 |
| - { |
2912 |
| - "cell_type": "code", |
2913 |
| - "execution_count": null, |
2914 |
| - "metadata": { |
2915 |
| - "collapsed": true |
2916 |
| - }, |
2917 |
| - "outputs": [], |
2918 |
| - "source": [ |
2919 |
| - "# Same fit as above:\n", |
2920 |
| - "pd_model = pdols(y=mean_sea_level[\"mean_global\"], x=mean_sea_level[[\"northern_hem\", \"southern_hem\"]])\n", |
2921 |
| - "pd_model" |
2922 |
| - ] |
2923 |
| - }, |
2924 | 2892 | {
|
2925 | 2893 | "cell_type": "code",
|
2926 | 2894 | "execution_count": null,
|
|
3140 | 3108 | "source": [
|
3141 | 3109 | "# Not constant reads apparently. Let's downscale the frequency of the sea levels \n",
|
3142 | 3110 | "# to monthly, like the temperature reads we have:\n",
|
3143 |
| - "monthly_mean_sea_level = mean_sea_level.resample(\"MS\").to_period()\n", |
| 3111 | + "monthly_mean_sea_level = mean_sea_level.resample(\"MS\").mean().to_period()\n", |
3144 | 3112 | "monthly_mean_sea_level"
|
3145 | 3113 | ]
|
3146 | 3114 | },
|
|
3259 | 3227 | },
|
3260 | 3228 | "outputs": [],
|
3261 | 3229 | "source": [
|
3262 |
| - "model = sm.ols(\"southern_hem ~ global_temp\", data=aligned_monthly_data).fit()\n", |
3263 |
| - "model.rsquared" |
| 3230 | + "model = sm.ols(\"southern_hem ~ global_temp\", data=aligned_monthly_data)\n", |
| 3231 | + "params = model.fit()\n", |
| 3232 | + "params.rsquared" |
3264 | 3233 | ]
|
3265 | 3234 | },
|
3266 | 3235 | {
|
|
3278 | 3247 | },
|
3279 | 3248 | "outputs": [],
|
3280 | 3249 | "source": [
|
3281 |
| - "aligned_yearly_data = aligned_monthly_data.resample(\"A\")\n", |
| 3250 | + "aligned_yearly_data = aligned_monthly_data.resample(\"A\").mean()\n", |
3282 | 3251 | "aligned_yearly_data.plot()"
|
3283 | 3252 | ]
|
3284 | 3253 | },
|
|
3329 | 3298 | "source": [
|
3330 | 3299 | "import statsmodels as sm\n",
|
3331 | 3300 | "# Let's remove seasonal variations by resampling annually\n",
|
3332 |
| - "data = giss_temp_series.resample(\"A\").to_timestamp()\n", |
| 3301 | + "data = giss_temp_series.resample(\"A\").mean().to_timestamp()\n", |
3333 | 3302 | "ar_model = sm.tsa.ar_model.AR(data, freq='A')\n",
|
3334 | 3303 | "ar_res = ar_model.fit(maxlag=60, disp=True)"
|
3335 | 3304 | ]
|
|
3370 | 3339 | "source": [
|
3371 | 3340 | "# Your code here"
|
3372 | 3341 | ]
|
3373 |
| - }, |
3374 |
| - { |
3375 |
| - "cell_type": "markdown", |
3376 |
| - "metadata": {}, |
3377 |
| - "source": [ |
3378 |
| - "## Want to practice more?" |
3379 |
| - ] |
3380 |
| - }, |
3381 |
| - { |
3382 |
| - "cell_type": "markdown", |
3383 |
| - "metadata": {}, |
3384 |
| - "source": [ |
3385 |
| - "**EXERCISE (computations):** Refer to `exercises/stock_returns/stock_returns.py`" |
3386 |
| - ] |
3387 |
| - }, |
3388 |
| - { |
3389 |
| - "cell_type": "markdown", |
3390 |
| - "metadata": {}, |
3391 |
| - "source": [ |
3392 |
| - "**EXERCISE (stats, groupby, timeseries):** Refer to `exercises/pandas_wind_statistics/pandas_wind_statistics.py`" |
3393 |
| - ] |
3394 |
| - }, |
3395 |
| - { |
3396 |
| - "cell_type": "code", |
3397 |
| - "execution_count": null, |
3398 |
| - "metadata": { |
3399 |
| - "collapsed": false |
3400 |
| - }, |
3401 |
| - "outputs": [], |
3402 |
| - "source": [] |
3403 | 3342 | }
|
3404 | 3343 | ],
|
3405 | 3344 | "metadata": {
|
|
0 commit comments