@@ -58,6 +58,10 @@ def DataReader(name, data_source=None, start=None, end=None,
58
58
return get_data_yahoo (symbols = name , start = start , end = end ,
59
59
adjust_price = False , chunk = 25 ,
60
60
retry_count = retry_count , pause = pause )
61
+ elif (data_source == "google" ):
62
+ return get_data_google (symbols = name , start = start , end = end ,
63
+ adjust_price = False , chunk = 25 ,
64
+ retry_count = retry_count , pause = pause )
61
65
elif (data_source == "fred" ):
62
66
return get_data_fred (name = name , start = start , end = end )
63
67
elif (data_source == "famafrench" ):
@@ -132,6 +136,56 @@ def get_quote_yahoo(symbols):
132
136
return DataFrame (data , index = idx )
133
137
134
138
139
+ def get_quote_google (symbols ):
140
+ """
141
+ Get current yahoo quote
142
+
143
+ Returns a DataFrame
144
+ """
145
+ if isinstance (symbols , str ):
146
+ sym_list = symbols
147
+ elif not isinstance (symbols , Series ):
148
+ symbols = Series (symbols )
149
+ sym_list = str .join ('+' , symbols )
150
+ else :
151
+ sym_list = str .join ('+' , symbols )
152
+
153
+ # for codes see: http://www.gummy-stuff.org/Yahoo-data.htm
154
+ codes = {'symbol' : 's' , 'last' : 'l1' , 'change_pct' : 'p2' , 'PE' : 'r' ,
155
+ 'time' : 't1' , 'short_ratio' : 's7' }
156
+ request = str .join ('' , codes .values ()) # code request string
157
+ header = codes .keys ()
158
+
159
+ data = dict (zip (codes .keys (), [[] for i in range (len (codes ))]))
160
+
161
+ urlStr = 'http://finance.yahoo.com/d/quotes.csv?s=%s&f=%s' % (
162
+ sym_list , request )
163
+
164
+ try :
165
+ lines = urllib2 .urlopen (urlStr ).readlines ()
166
+ except Exception , e :
167
+ s = "Failed to download:\n {0}" .format (e )
168
+ print s
169
+ return None
170
+
171
+ for line in lines :
172
+ fields = line .decode ('utf-8' ).strip ().split (',' )
173
+ for i , field in enumerate (fields ):
174
+ if field [- 2 :] == '%"' :
175
+ data [header [i ]].append (float (field .strip ('"%' )))
176
+ elif field [0 ] == '"' :
177
+ data [header [i ]].append (field .strip ('"' ))
178
+ else :
179
+ try :
180
+ data [header [i ]].append (float (field ))
181
+ except ValueError :
182
+ data [header [i ]].append (np .nan )
183
+
184
+ idx = data .pop ('symbol' )
185
+
186
+ return DataFrame (data , index = idx )
187
+
188
+
135
189
def _get_hist_yahoo (sym = None , start = None , end = None , retry_count = 3 ,
136
190
pause = 0 , ** kwargs ):
137
191
"""
@@ -178,6 +232,52 @@ def _get_hist_yahoo(sym=None, start=None, end=None, retry_count=3,
178
232
"return a 200 for url %s" % (pause , url ))
179
233
180
234
235
+ def _get_hist_google (sym = None , start = None , end = None , retry_count = 3 ,
236
+ pause = 0 , ** kwargs ):
237
+ """
238
+ Get historical data for the given name from yahoo.
239
+ Date format is datetime
240
+
241
+ Returns a DataFrame.
242
+ """
243
+ if (sym is None ):
244
+ warnings .warn ("Need to provide a name." )
245
+ return None
246
+
247
+ start , end = _sanitize_dates (start , end )
248
+
249
+ yahoo_URL = 'http://ichart.yahoo.com/table.csv?'
250
+
251
+ url = yahoo_URL + 's=%s' % sym + \
252
+ '&a=%s' % (start .month - 1 ) + \
253
+ '&b=%s' % start .day + \
254
+ '&c=%s' % start .year + \
255
+ '&d=%s' % (end .month - 1 ) + \
256
+ '&e=%s' % end .day + \
257
+ '&f=%s' % end .year + \
258
+ '&g=d' + \
259
+ '&ignore=.csv'
260
+
261
+ for _ in range (retry_count ):
262
+ resp = urllib2 .urlopen (url )
263
+ if resp .code == 200 :
264
+ lines = resp .read ()
265
+ rs = read_csv (StringIO (bytes_to_str (lines )), index_col = 0 ,
266
+ parse_dates = True )[::- 1 ]
267
+
268
+ # Yahoo! Finance sometimes does this awesome thing where they
269
+ # return 2 rows for the most recent business day
270
+ if len (rs ) > 2 and rs .index [- 1 ] == rs .index [- 2 ]: # pragma: no cover
271
+ rs = rs [:- 1 ]
272
+
273
+ return rs
274
+
275
+ time .sleep (pause )
276
+
277
+ raise Exception ("after %d tries, Yahoo did not "
278
+ "return a 200 for url %s" % (pause , url ))
279
+
280
+
181
281
def _adjust_prices (hist_data , price_list = ['Open' , 'High' , 'Low' , 'Close' ]):
182
282
"""
183
283
Return modifed DataFrame or Panel with adjusted prices based on
@@ -347,6 +447,84 @@ def dl_mult_symbols(symbols):
347
447
348
448
return hist_data
349
449
450
+ def get_data_google (symbols = None , start = None , end = None , retry_count = 3 , pause = 0 ,
451
+ adjust_price = False , ret_index = False , chunksize = 25 ,
452
+ ** kwargs ):
453
+ """
454
+ Returns DataFrame/Panel of historical stock prices from symbols, over date
455
+ range, start to end. To avoid being penalized by Yahoo! Finance servers,
456
+ pauses between downloading 'chunks' of symbols can be specified.
457
+
458
+ Parameters
459
+ ----------
460
+ symbols : string, array-like object (list, tuple, Series), or DataFrame
461
+ Single stock symbol (ticker), array-like object of symbols or
462
+ DataFrame with index containing stock symbols.
463
+ start : string, (defaults to '1/1/2010')
464
+ Starting date, timestamp. Parses many different kind of date
465
+ representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
466
+ end : string, (defaults to today)
467
+ Ending date, timestamp. Same format as starting date.
468
+ retry_count : int, default 3
469
+ Number of times to retry query request.
470
+ pause : int, default 0
471
+ Time, in seconds, to pause between consecutive queries of chunks. If
472
+ single value given for symbol, represents the pause between retries.
473
+ adjust_price : bool, default False
474
+ If True, adjusts all prices in hist_data ('Open', 'High', 'Low', 'Close')
475
+ based on 'Adj Close' price. Adds 'Adj_Ratio' column and drops
476
+ 'Adj Close'.
477
+ ret_index : bool, default False
478
+ If True, includes a simple return index 'Ret_Index' in hist_data.
479
+ chunksize : int, default 25
480
+ Number of symbols to download consecutively before intiating pause.
481
+
482
+ Returns
483
+ -------
484
+ hist_data : DataFrame (str) or Panel (array-like object, DataFrame)
485
+ """
486
+
487
+ def dl_mult_symbols (symbols ):
488
+ stocks = {}
489
+ for sym_group in _in_chunks (symbols , chunksize ):
490
+ for sym in sym_group :
491
+ try :
492
+ stocks [sym ] = _get_hist_google (sym , start = start ,
493
+ end = end , ** kwargs )
494
+ except :
495
+ warnings .warn ('Error with sym: ' + sym + '... skipping.' )
496
+
497
+ time .sleep (pause )
498
+
499
+ return Panel (stocks ).swapaxes ('items' , 'minor' )
500
+
501
+ if 'name' in kwargs :
502
+ warnings .warn ("Arg 'name' is deprecated, please use 'symbols' instead." ,
503
+ FutureWarning )
504
+ symbols = kwargs ['name' ]
505
+
506
+ #If a single symbol, (e.g., 'GOOG')
507
+ if isinstance (symbols , (str , int )):
508
+ sym = symbols
509
+ hist_data = _get_hist_google (sym , start = start , end = end )
510
+ #Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT'])
511
+ elif isinstance (symbols , DataFrame ):
512
+ try :
513
+ hist_data = dl_mult_symbols (Series (symbols .index ))
514
+ except ValueError :
515
+ raise
516
+ else : #Guess a Series
517
+ try :
518
+ hist_data = dl_mult_symbols (symbols )
519
+ except TypeError :
520
+ hist_data = dl_mult_symbols (Series (symbols ))
521
+
522
+ if (ret_index ):
523
+ hist_data ['Ret_Index' ] = _calc_return_index (hist_data ['Adj Close' ])
524
+ if (adjust_price ):
525
+ hist_data = _adjust_prices (hist_data )
526
+
527
+ return hist_data
350
528
351
529
def get_data_fred (name = None , start = dt .datetime (2010 , 1 , 1 ),
352
530
end = dt .datetime .today ()):
0 commit comments