Skip to content

Commit 8196db9

Browse files
committed
Use google finance as datasource (test only, still pointing to yahoo finance)
1 parent 3ebfef9 commit 8196db9

File tree

2 files changed

+273
-0
lines changed

2 files changed

+273
-0
lines changed

pandas/io/data.py

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,10 @@ def DataReader(name, data_source=None, start=None, end=None,
5858
return get_data_yahoo(symbols=name, start=start, end=end,
5959
adjust_price=False, chunk=25,
6060
retry_count=retry_count, pause=pause)
61+
elif(data_source == "google"):
62+
return get_data_google(symbols=name, start=start, end=end,
63+
adjust_price=False, chunk=25,
64+
retry_count=retry_count, pause=pause)
6165
elif(data_source == "fred"):
6266
return get_data_fred(name=name, start=start, end=end)
6367
elif(data_source == "famafrench"):
@@ -132,6 +136,56 @@ def get_quote_yahoo(symbols):
132136
return DataFrame(data, index=idx)
133137

134138

139+
def get_quote_google(symbols):
140+
"""
141+
Get current yahoo quote
142+
143+
Returns a DataFrame
144+
"""
145+
if isinstance(symbols, str):
146+
sym_list = symbols
147+
elif not isinstance(symbols, Series):
148+
symbols = Series(symbols)
149+
sym_list = str.join('+', symbols)
150+
else:
151+
sym_list = str.join('+', symbols)
152+
153+
# for codes see: http://www.gummy-stuff.org/Yahoo-data.htm
154+
codes = {'symbol': 's', 'last': 'l1', 'change_pct': 'p2', 'PE': 'r',
155+
'time': 't1', 'short_ratio': 's7'}
156+
request = str.join('', codes.values()) # code request string
157+
header = codes.keys()
158+
159+
data = dict(zip(codes.keys(), [[] for i in range(len(codes))]))
160+
161+
urlStr = 'http://finance.yahoo.com/d/quotes.csv?s=%s&f=%s' % (
162+
sym_list, request)
163+
164+
try:
165+
lines = urllib2.urlopen(urlStr).readlines()
166+
except Exception, e:
167+
s = "Failed to download:\n{0}".format(e)
168+
print s
169+
return None
170+
171+
for line in lines:
172+
fields = line.decode('utf-8').strip().split(',')
173+
for i, field in enumerate(fields):
174+
if field[-2:] == '%"':
175+
data[header[i]].append(float(field.strip('"%')))
176+
elif field[0] == '"':
177+
data[header[i]].append(field.strip('"'))
178+
else:
179+
try:
180+
data[header[i]].append(float(field))
181+
except ValueError:
182+
data[header[i]].append(np.nan)
183+
184+
idx = data.pop('symbol')
185+
186+
return DataFrame(data, index=idx)
187+
188+
135189
def _get_hist_yahoo(sym=None, start=None, end=None, retry_count=3,
136190
pause=0, **kwargs):
137191
"""
@@ -178,6 +232,52 @@ def _get_hist_yahoo(sym=None, start=None, end=None, retry_count=3,
178232
"return a 200 for url %s" % (pause, url))
179233

180234

235+
def _get_hist_google(sym=None, start=None, end=None, retry_count=3,
236+
pause=0, **kwargs):
237+
"""
238+
Get historical data for the given name from yahoo.
239+
Date format is datetime
240+
241+
Returns a DataFrame.
242+
"""
243+
if(sym is None):
244+
warnings.warn("Need to provide a name.")
245+
return None
246+
247+
start, end = _sanitize_dates(start, end)
248+
249+
yahoo_URL = 'http://ichart.yahoo.com/table.csv?'
250+
251+
url = yahoo_URL + 's=%s' % sym + \
252+
'&a=%s' % (start.month - 1) + \
253+
'&b=%s' % start.day + \
254+
'&c=%s' % start.year + \
255+
'&d=%s' % (end.month - 1) + \
256+
'&e=%s' % end.day + \
257+
'&f=%s' % end.year + \
258+
'&g=d' + \
259+
'&ignore=.csv'
260+
261+
for _ in range(retry_count):
262+
resp = urllib2.urlopen(url)
263+
if resp.code == 200:
264+
lines = resp.read()
265+
rs = read_csv(StringIO(bytes_to_str(lines)), index_col=0,
266+
parse_dates=True)[::-1]
267+
268+
# Yahoo! Finance sometimes does this awesome thing where they
269+
# return 2 rows for the most recent business day
270+
if len(rs) > 2 and rs.index[-1] == rs.index[-2]: # pragma: no cover
271+
rs = rs[:-1]
272+
273+
return rs
274+
275+
time.sleep(pause)
276+
277+
raise Exception("after %d tries, Yahoo did not "
278+
"return a 200 for url %s" % (pause, url))
279+
280+
181281
def _adjust_prices(hist_data, price_list=['Open', 'High', 'Low', 'Close']):
182282
"""
183283
Return modifed DataFrame or Panel with adjusted prices based on
@@ -347,6 +447,84 @@ def dl_mult_symbols(symbols):
347447

348448
return hist_data
349449

450+
def get_data_google(symbols=None, start=None, end=None, retry_count=3, pause=0,
451+
adjust_price=False, ret_index=False, chunksize=25,
452+
**kwargs):
453+
"""
454+
Returns DataFrame/Panel of historical stock prices from symbols, over date
455+
range, start to end. To avoid being penalized by Yahoo! Finance servers,
456+
pauses between downloading 'chunks' of symbols can be specified.
457+
458+
Parameters
459+
----------
460+
symbols : string, array-like object (list, tuple, Series), or DataFrame
461+
Single stock symbol (ticker), array-like object of symbols or
462+
DataFrame with index containing stock symbols.
463+
start : string, (defaults to '1/1/2010')
464+
Starting date, timestamp. Parses many different kind of date
465+
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
466+
end : string, (defaults to today)
467+
Ending date, timestamp. Same format as starting date.
468+
retry_count : int, default 3
469+
Number of times to retry query request.
470+
pause : int, default 0
471+
Time, in seconds, to pause between consecutive queries of chunks. If
472+
single value given for symbol, represents the pause between retries.
473+
adjust_price : bool, default False
474+
If True, adjusts all prices in hist_data ('Open', 'High', 'Low', 'Close')
475+
based on 'Adj Close' price. Adds 'Adj_Ratio' column and drops
476+
'Adj Close'.
477+
ret_index : bool, default False
478+
If True, includes a simple return index 'Ret_Index' in hist_data.
479+
chunksize : int, default 25
480+
Number of symbols to download consecutively before intiating pause.
481+
482+
Returns
483+
-------
484+
hist_data : DataFrame (str) or Panel (array-like object, DataFrame)
485+
"""
486+
487+
def dl_mult_symbols(symbols):
488+
stocks = {}
489+
for sym_group in _in_chunks(symbols, chunksize):
490+
for sym in sym_group:
491+
try:
492+
stocks[sym] = _get_hist_google(sym, start=start,
493+
end=end, **kwargs)
494+
except:
495+
warnings.warn('Error with sym: ' + sym + '... skipping.')
496+
497+
time.sleep(pause)
498+
499+
return Panel(stocks).swapaxes('items', 'minor')
500+
501+
if 'name' in kwargs:
502+
warnings.warn("Arg 'name' is deprecated, please use 'symbols' instead.",
503+
FutureWarning)
504+
symbols = kwargs['name']
505+
506+
#If a single symbol, (e.g., 'GOOG')
507+
if isinstance(symbols, (str, int)):
508+
sym = symbols
509+
hist_data = _get_hist_google(sym, start=start, end=end)
510+
#Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT'])
511+
elif isinstance(symbols, DataFrame):
512+
try:
513+
hist_data = dl_mult_symbols(Series(symbols.index))
514+
except ValueError:
515+
raise
516+
else: #Guess a Series
517+
try:
518+
hist_data = dl_mult_symbols(symbols)
519+
except TypeError:
520+
hist_data = dl_mult_symbols(Series(symbols))
521+
522+
if(ret_index):
523+
hist_data['Ret_Index'] = _calc_return_index(hist_data['Adj Close'])
524+
if(adjust_price):
525+
hist_data = _adjust_prices(hist_data)
526+
527+
return hist_data
350528

351529
def get_data_fred(name=None, start=dt.datetime(2010, 1, 1),
352530
end=dt.datetime.today()):

pandas/io/tests/test_google.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
import unittest
2+
import nose
3+
from datetime import datetime
4+
5+
import pandas as pd
6+
import pandas.io.data as web
7+
from pandas.util.testing import (network, assert_frame_equal,
8+
assert_series_equal,
9+
assert_almost_equal)
10+
from numpy.testing.decorators import slow
11+
12+
import urllib2
13+
14+
15+
class TestGoogle(unittest.TestCase):
16+
17+
@slow
18+
@network
19+
def test_google(self):
20+
# asserts that google is minimally working and that it throws
21+
# an excecption when DataReader can't get a 200 response from
22+
# google
23+
start = datetime(2010, 1, 1)
24+
end = datetime(2013, 01, 27)
25+
26+
try:
27+
self.assertEquals(
28+
web.DataReader("F", 'google', start, end)['Close'][-1],
29+
13.68)
30+
31+
self.assertRaises(
32+
Exception,
33+
lambda: web.DataReader("NON EXISTENT TICKER", 'google',
34+
start, end))
35+
except urllib2.URLError:
36+
try:
37+
urllib2.urlopen('http://www.google.com')
38+
except urllib2.URLError:
39+
raise nose.SkipTest
40+
else:
41+
raise
42+
43+
44+
@slow
45+
@network
46+
def test_get_quote(self):
47+
df = web.get_quote_google(pd.Series(['GOOG', 'AAPL', 'GOOG']))
48+
assert_series_equal(df.ix[0], df.ix[2])
49+
50+
51+
@slow
52+
@network
53+
def test_get_data(self):
54+
import numpy as np
55+
df = web.get_data_google('GOOG')
56+
assert df.Volume.ix['OCT-08-2010'] == 2859200
57+
58+
sl = ['AAPL', 'AMZN', 'GOOG']
59+
pan = web.get_data_google(sl, '2012')
60+
ts = pan.Close.GOOG.index[pan.Close.AAPL > pan.Close.GOOG]
61+
assert ts[0].dayofyear == 96
62+
63+
pan = web.get_data_google(['GE', 'MSFT', 'INTC'], 'JAN-01-12', 'JAN-31-12')
64+
expected = [19.02, 28.23, 25.39]
65+
result = pan.Close.ix['01-18-12'][['GE', 'MSFT', 'INTC']].tolist()
66+
assert result == expected
67+
68+
# sanity checking
69+
t= np.array(result)
70+
assert np.issubdtype(t.dtype, np.floating)
71+
assert t.shape == (3,)
72+
73+
expected = [[ 18.99, 28.4 , 25.18],
74+
[ 18.58, 28.31, 25.13],
75+
[ 19.03, 28.16, 25.52],
76+
[ 18.81, 28.82, 25.87]]
77+
result = pan.Open.ix['Jan-15-12':'Jan-20-12'][['GE', 'MSFT', 'INTC']].values
78+
assert (result == expected).all()
79+
80+
#Check ret_index
81+
pan = web.get_data_google(['GE', 'INTC', 'IBM'], '1977', '1987',
82+
ret_index=True)
83+
tstamp = pan.Ret_Index.INTC.first_valid_index()
84+
result = pan.Ret_Index.ix[tstamp]['INTC']
85+
expected = 1.0
86+
assert result == expected
87+
88+
# sanity checking
89+
t= np.array(pan)
90+
assert np.issubdtype(t.dtype, np.floating)
91+
92+
93+
if __name__ == '__main__':
94+
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
95+
exit=False)

0 commit comments

Comments
 (0)