|
| 1 | +''' |
| 2 | +(c) 2011, 2012 Georgia Tech Research Corporation |
| 3 | +This source code is released under the New BSD license. Please see |
| 4 | +http://wiki.quantsoftware.org/index.php?title=QSTK_License |
| 5 | +for license details. |
| 6 | +
|
| 7 | +Created on October, 4, 2013 |
| 8 | +
|
| 9 | +@author: Sourabh Bajaj |
| 10 | +@contact: sourabhbajaj@gatech.edu |
| 11 | +@summary: Example tutorial code. |
| 12 | +''' |
| 13 | + |
| 14 | +import pandas as pd |
| 15 | +import datetime as dt |
| 16 | +import numpy as np |
| 17 | + |
| 18 | +## Tutorial on using Pandas in QSTK |
| 19 | +ldt_timestamps = [] |
| 20 | +for i in range(1, 6): |
| 21 | + ldt_timestamps.append(dt.datetime(2011, 1, i, 16)) |
| 22 | + |
| 23 | +print "The index we created has the following dates : " |
| 24 | +print ldt_timestamps |
| 25 | +print |
| 26 | + |
| 27 | +## TimeSeries |
| 28 | +ts_single_value = pd.TimeSeries(0.0, index=ldt_timestamps) |
| 29 | +print "A timeseries initialized to one single value : " |
| 30 | + |
| 31 | +na_vals = np.arange(len(ldt_timestamps)) |
| 32 | +print "Dummy initialized array : " |
| 33 | +print na_vals |
| 34 | +print |
| 35 | + |
| 36 | +ts_array = pd.TimeSeries(na_vals, index=ldt_timestamps) |
| 37 | +print "A timeseries initialized using a numpy array : " |
| 38 | +print ts_array |
| 39 | +print |
| 40 | + |
| 41 | +print "Reading the timeseries for a particular date" |
| 42 | +print "Date : ", ldt_timestamps[1] |
| 43 | +print "Value : ", ts_array[ldt_timestamps[1]] |
| 44 | +print |
| 45 | + |
| 46 | +print "Initializing a list of symbols : " |
| 47 | +ls_symbols = ['AAPL', 'GOOG', 'MSFT', 'IBM'] |
| 48 | +print ls_symbols |
| 49 | +print |
| 50 | + |
| 51 | +print "Initializing a dataframe with one value : " |
| 52 | +df_single = pd.DataFrame(index=ldt_timestamps, columns=ls_symbols) |
| 53 | +df_single = df_single.fillna(0.0) |
| 54 | +print df_single |
| 55 | +print |
| 56 | + |
| 57 | +print "Initializing a dataframe with a numpy array : " |
| 58 | +na_vals_2 = np.random.randn(len(ldt_timestamps), len(ls_symbols)) |
| 59 | +df_vals = pd.DataFrame(na_vals_2, index=ldt_timestamps, columns=ls_symbols) |
| 60 | +print df_vals |
| 61 | +print |
| 62 | + |
| 63 | +print "Access the timeseries of a particular symbol : " |
| 64 | +print df_vals[ls_symbols[1]] |
| 65 | +print |
| 66 | + |
| 67 | +print "Access the timeseries of a particular date : " |
| 68 | +print df_vals.ix[ldt_timestamps[1]] |
| 69 | +print |
| 70 | + |
| 71 | +print "Access the value for a specific symbol on a specific date: " |
| 72 | +print df_vals[ls_symbols[1]].ix[ldt_timestamps[1]] |
| 73 | +print |
| 74 | + |
| 75 | +print "Reindexing the dataframe" |
| 76 | +ldt_new_dates = [dt.datetime(2011, 1, 3, 16), |
| 77 | + dt.datetime(2011, 1, 5, 16), |
| 78 | + dt.datetime(2011, 1, 7, 16)] |
| 79 | +ls_new_symbols = ['AAPL', 'IBM', 'XOM'] |
| 80 | +df_new = df_vals.reindex(index=ldt_new_dates, columns=ls_new_symbols) |
| 81 | +print df_new |
| 82 | +print "Observe that reindex carried over whatever values it could find and set the rest to NAN" |
| 83 | +print |
| 84 | + |
| 85 | +print "For pandas rolling statistics please refer : http://pandas.pydata.org/pandas-docs/dev/computation.html#moving-rolling-statistics-moments" |
| 86 | + |
0 commit comments