Skip to content

Commit c8b73ed

Browse files
committed
pandas tutorial
1 parent d89a737 commit c8b73ed

1 file changed

Lines changed: 86 additions & 0 deletions

File tree

Examples/Basic/pandas-tutorial.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
'''
2+
(c) 2011, 2012 Georgia Tech Research Corporation
3+
This source code is released under the New BSD license. Please see
4+
http://wiki.quantsoftware.org/index.php?title=QSTK_License
5+
for license details.
6+
7+
Created on October, 4, 2013
8+
9+
@author: Sourabh Bajaj
10+
@contact: sourabhbajaj@gatech.edu
11+
@summary: Example tutorial code.
12+
'''
13+
14+
import pandas as pd
15+
import datetime as dt
16+
import numpy as np
17+
18+
## Tutorial on using Pandas in QSTK
19+
ldt_timestamps = []
20+
for i in range(1, 6):
21+
ldt_timestamps.append(dt.datetime(2011, 1, i, 16))
22+
23+
print "The index we created has the following dates : "
24+
print ldt_timestamps
25+
print
26+
27+
## TimeSeries
28+
ts_single_value = pd.TimeSeries(0.0, index=ldt_timestamps)
29+
print "A timeseries initialized to one single value : "
30+
31+
na_vals = np.arange(len(ldt_timestamps))
32+
print "Dummy initialized array : "
33+
print na_vals
34+
print
35+
36+
ts_array = pd.TimeSeries(na_vals, index=ldt_timestamps)
37+
print "A timeseries initialized using a numpy array : "
38+
print ts_array
39+
print
40+
41+
print "Reading the timeseries for a particular date"
42+
print "Date : ", ldt_timestamps[1]
43+
print "Value : ", ts_array[ldt_timestamps[1]]
44+
print
45+
46+
print "Initializing a list of symbols : "
47+
ls_symbols = ['AAPL', 'GOOG', 'MSFT', 'IBM']
48+
print ls_symbols
49+
print
50+
51+
print "Initializing a dataframe with one value : "
52+
df_single = pd.DataFrame(index=ldt_timestamps, columns=ls_symbols)
53+
df_single = df_single.fillna(0.0)
54+
print df_single
55+
print
56+
57+
print "Initializing a dataframe with a numpy array : "
58+
na_vals_2 = np.random.randn(len(ldt_timestamps), len(ls_symbols))
59+
df_vals = pd.DataFrame(na_vals_2, index=ldt_timestamps, columns=ls_symbols)
60+
print df_vals
61+
print
62+
63+
print "Access the timeseries of a particular symbol : "
64+
print df_vals[ls_symbols[1]]
65+
print
66+
67+
print "Access the timeseries of a particular date : "
68+
print df_vals.ix[ldt_timestamps[1]]
69+
print
70+
71+
print "Access the value for a specific symbol on a specific date: "
72+
print df_vals[ls_symbols[1]].ix[ldt_timestamps[1]]
73+
print
74+
75+
print "Reindexing the dataframe"
76+
ldt_new_dates = [dt.datetime(2011, 1, 3, 16),
77+
dt.datetime(2011, 1, 5, 16),
78+
dt.datetime(2011, 1, 7, 16)]
79+
ls_new_symbols = ['AAPL', 'IBM', 'XOM']
80+
df_new = df_vals.reindex(index=ldt_new_dates, columns=ls_new_symbols)
81+
print df_new
82+
print "Observe that reindex carried over whatever values it could find and set the rest to NAN"
83+
print
84+
85+
print "For pandas rolling statistics please refer : http://pandas.pydata.org/pandas-docs/dev/computation.html#moving-rolling-statistics-moments"
86+

0 commit comments

Comments
 (0)