Skip to content

ENH: Add requests session to Options #122

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 25, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/source/whatsnew/v0.2.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,14 @@ New features
~~~~~~~~~~~~

- ``DataReader`` now supports Eurostat data sources, see :ref:`here<remote_data.eurostat>` (:issue:`101`).
- ``Options`` downloading is approximately 4x faster as a result of a rewrite of the parsing function. (:issue: `122`)

.. _whatsnew_021.api_breaking:

Backwards incompatible API changes
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- ``Options`` columns ``PctChg`` and ``IV`` (Implied Volatility) are now type float
rather than string. (:issue: `122`)

.. _whatsnew_021.bug_fixes:

Expand Down
4 changes: 3 additions & 1 deletion pandas_datareader/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import datetime as dt

import requests
from requests_file import FileAdapter

from pandas import to_datetime
import pandas.compat as compat
Expand Down Expand Up @@ -55,6 +56,7 @@ def __init__(self, symbols, start=None, end=None,
def _init_session(self, session, retry_count):
if session is None:
session = requests.Session()
session.mount('file://', FileAdapter())
# do not set requests max_retries here to support arbitrary pause
return session

Expand Down Expand Up @@ -197,4 +199,4 @@ def _in_chunks(seq, size):
"""
Return sequence in 'chunks' of size defined by size
"""
return (seq[pos:pos + size] for pos in range(0, len(seq), size))
return (seq[pos:pos + size] for pos in range(0, len(seq), size))
4 changes: 2 additions & 2 deletions pandas_datareader/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,12 +128,12 @@ def DataReader(name, data_source=None, start=None, end=None,



def Options(symbol, data_source=None):
def Options(symbol, data_source=None, session=None):
if data_source is None:
warnings.warn("Options(symbol) is deprecated, use Options(symbol,"
" data_source) instead", FutureWarning, stacklevel=2)
data_source = "yahoo"
if data_source == "yahoo":
return YahooOptions(symbol)
return YahooOptions(symbol, session=session)
else:
raise NotImplementedError("currently only yahoo supported")
8 changes: 4 additions & 4 deletions pandas_datareader/tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,9 +351,9 @@ def setUpClass(cls):
cls.month = 1
cls.expiry = datetime(cls.year, cls.month, 1)
cls.dirpath = tm.get_data_path()
cls.html1 = os.path.join(cls.dirpath, 'yahoo_options1.html')
cls.html2 = os.path.join(cls.dirpath, 'yahoo_options2.html')
cls.html3 = os.path.join(cls.dirpath, 'yahoo_options3.html') #Empty table GH#22
cls.html1 = 'file://' + os.path.join(cls.dirpath, 'yahoo_options1.html')
cls.html2 = 'file://' + os.path.join(cls.dirpath, 'yahoo_options2.html')
cls.html3 = 'file://' + os.path.join(cls.dirpath, 'yahoo_options3.html') #Empty table GH#22
cls.data1 = cls.aapl._option_frames_from_url(cls.html1)['puts']

@classmethod
Expand Down Expand Up @@ -381,7 +381,7 @@ def test_get_near_stock_price(self):
self.assertTrue(len(options) > 1)

def test_options_is_not_none(self):
option = web.Options('aapl')
option = web.Options('aapl', 'yahoo')
self.assertTrue(option is not None)

def test_get_call_data(self):
Expand Down
50 changes: 30 additions & 20 deletions pandas_datareader/yahoo/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,34 @@
from pandas import concat, DatetimeIndex, Series
from pandas.tseries.offsets import MonthEnd
from pandas.util.testing import _network_error_classes
from pandas.io.parsers import TextParser
from pandas import DataFrame

from pandas_datareader._utils import RemoteDataError
from pandas_datareader.base import _BaseReader

# Items needed for options class
CUR_MONTH = dt.datetime.now().month
CUR_YEAR = dt.datetime.now().year
CUR_DAY = dt.datetime.now().day


def _two_char(s):
return '{0:0>2}'.format(s)

def _unpack(row, kind='td'):
return [val.text_content().strip() for val in row.findall(kind)]

def _parse_options_data(table):
header = table.findall('thead/tr')
header = _unpack(header[0], kind='th')
rows = table.findall('tbody/tr')
data = [_unpack(r) for r in rows]
if len(data) > 0:
return TextParser(data, names=header).get_chunk()
else: #Empty table
return DataFrame(columns=header)

class Options(object):
class Options(_BaseReader):
"""
***Experimental***
This class fetches call/put data for a given stock/expiry month.
Expand Down Expand Up @@ -62,13 +76,13 @@ class Options(object):
>>> all_data = aapl.get_all_data()
"""

_TABLE_LOC = {'calls': 1, 'puts': 2}
_OPTIONS_BASE_URL = 'http://finance.yahoo.com/q/op?s={sym}'
_FINANCE_BASE_URL = 'http://finance.yahoo.com'

def __init__(self, symbol):
def __init__(self, symbol, session=None):
""" Instantiates options_data with a ticker saved as symbol """
self.symbol = symbol.upper()
super(Options, self).__init__(symbols=symbol, session=session)

def get_options_data(self, month=None, year=None, expiry=None):
"""
Expand Down Expand Up @@ -156,20 +170,19 @@ def _yahoo_url_from_expiry(self, expiry):
return self._FINANCE_BASE_URL + expiry_links[expiry]

def _option_frames_from_url(self, url):
frames = read_html(url)
nframes = len(frames)
frames_req = max(self._TABLE_LOC.values())
if nframes < frames_req:
raise RemoteDataError("%s options tables found (%s expected)" % (nframes, frames_req))

root = self._parse_url(url)
calls = root.xpath('//*[@id="optionsCallsTable"]/div[2]/div/table')[0]
puts = root.xpath('//*[@id="optionsPutsTable"]/div[2]/div/table')[0]

if not hasattr(self, 'underlying_price'):
try:
self.underlying_price, self.quote_time = self._underlying_price_and_time_from_url(url)
except IndexError:
self.underlying_price, self.quote_time = np.nan, np.nan

calls = frames[self._TABLE_LOC['calls']]
puts = frames[self._TABLE_LOC['puts']]
calls = _parse_options_data(calls)
puts = _parse_options_data(puts)

calls = self._process_data(calls, 'call')
puts = self._process_data(puts, 'put')
Expand Down Expand Up @@ -648,15 +661,10 @@ def _parse_url(self, url):
except ImportError: # pragma: no cover
raise ImportError("Please install lxml if you want to use the "
"{0!r} class".format(self.__class__.__name__))
try:
doc = parse(url)
except _network_error_classes: # pragma: no cover
raise RemoteDataError("Unable to parse URL "
"{0!r}".format(url))
else:
root = doc.getroot()
if root is None: # pragma: no cover
raise RemoteDataError("Parsed URL {0!r} has no root"
doc = parse(self._read_url_as_StringIO(url))
root = doc.getroot()
if root is None: # pragma: no cover
raise RemoteDataError("Parsed URL {0!r} has no root"
"element".format(url))
return root

Expand All @@ -682,6 +690,8 @@ def _process_data(self, frame, type):
frame['Underlying_Price'] = np.nan
frame["Quote_Time"] = np.nan
frame.rename(columns={'Open Int': 'Open_Int'}, inplace=True)
frame['IV'] = frame['IV'].str.replace(',','').str.strip('%').astype(float)/100
frame['PctChg'] = frame['PctChg'].str.replace(',','').str.strip('%').astype(float)/100
frame['Type'] = type
frame.set_index(['Strike', 'Expiry', 'Type', 'Symbol'], inplace=True)

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def readme():
return f.read()

INSTALL_REQUIRES = (
['pandas', 'requests']
['pandas', 'requests', 'requests-file']
)

setup(
Expand Down