pydata · femtotrader · Nov 25, 2015 · Nov 23, 2015
diff --git a/docs/source/whatsnew/v0.2.1.txt b/docs/source/whatsnew/v0.2.1.txt
@@ -19,11 +19,14 @@ New features
 ~~~~~~~~~~~~
 
 - ``DataReader`` now supports Eurostat data sources, see :ref:`here<remote_data.eurostat>` (:issue:`101`).
+- ``Options`` downloading is approximately 4x faster as a result of a rewrite of the parsing function. (:issue: `122`)
 
 .. _whatsnew_021.api_breaking:
 
 Backwards incompatible API changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+- ``Options`` columns ``PctChg`` and ``IV`` (Implied Volatility) are now type float
+  rather than string. (:issue: `122`)
 
 .. _whatsnew_021.bug_fixes:
 

diff --git a/pandas_datareader/base.py b/pandas_datareader/base.py
@@ -4,6 +4,7 @@
 import datetime as dt
 
 import requests
+from requests_file import FileAdapter
 
 from pandas import to_datetime
 import pandas.compat as compat
@@ -55,6 +56,7 @@ def __init__(self, symbols, start=None, end=None,
     def _init_session(self, session, retry_count):
         if session is None:
             session = requests.Session()
+            session.mount('file://', FileAdapter())
             # do not set requests max_retries here to support arbitrary pause
         return session
 
@@ -197,4 +199,4 @@ def _in_chunks(seq, size):
     """
     Return sequence in 'chunks' of size defined by size
     """
-    return (seq[pos:pos + size] for pos in range(0, len(seq), size))
+    return (seq[pos:pos + size] for pos in range(0, len(seq), size))
diff --git a/pandas_datareader/data.py b/pandas_datareader/data.py
@@ -128,12 +128,12 @@ def DataReader(name, data_source=None, start=None, end=None,
 
 
 
-def Options(symbol, data_source=None):
+def Options(symbol, data_source=None, session=None):
     if data_source is None:
         warnings.warn("Options(symbol) is deprecated, use Options(symbol,"
                       " data_source) instead", FutureWarning, stacklevel=2)
         data_source = "yahoo"
     if data_source == "yahoo":
-        return YahooOptions(symbol)
+        return YahooOptions(symbol, session=session)
     else:
         raise NotImplementedError("currently only yahoo supported")
diff --git a/pandas_datareader/tests/test_data.py b/pandas_datareader/tests/test_data.py
@@ -351,9 +351,9 @@ def setUpClass(cls):
             cls.month = 1
         cls.expiry = datetime(cls.year, cls.month, 1)
         cls.dirpath = tm.get_data_path()
-        cls.html1 = os.path.join(cls.dirpath, 'yahoo_options1.html')
-        cls.html2 = os.path.join(cls.dirpath, 'yahoo_options2.html')
-        cls.html3 = os.path.join(cls.dirpath, 'yahoo_options3.html') #Empty table GH#22
+        cls.html1 = 'file://' + os.path.join(cls.dirpath, 'yahoo_options1.html')
+        cls.html2 = 'file://' + os.path.join(cls.dirpath, 'yahoo_options2.html')
+        cls.html3 = 'file://' + os.path.join(cls.dirpath, 'yahoo_options3.html') #Empty table GH#22
         cls.data1 = cls.aapl._option_frames_from_url(cls.html1)['puts']
 
     @classmethod
@@ -381,7 +381,7 @@ def test_get_near_stock_price(self):
         self.assertTrue(len(options) > 1)
 
     def test_options_is_not_none(self):
-        option = web.Options('aapl')
+        option = web.Options('aapl', 'yahoo')
         self.assertTrue(option is not None)
 
     def test_get_call_data(self):

diff --git a/pandas_datareader/yahoo/options.py b/pandas_datareader/yahoo/options.py
@@ -7,20 +7,34 @@
 from pandas import concat, DatetimeIndex, Series
 from pandas.tseries.offsets import MonthEnd
 from pandas.util.testing import _network_error_classes
+from pandas.io.parsers import TextParser
+from pandas import DataFrame
 
 from pandas_datareader._utils import RemoteDataError
+from pandas_datareader.base import _BaseReader
 
 # Items needed for options class
 CUR_MONTH = dt.datetime.now().month
 CUR_YEAR = dt.datetime.now().year
 CUR_DAY = dt.datetime.now().day
 
-
 def _two_char(s):
     return '{0:0>2}'.format(s)
 
+def _unpack(row, kind='td'):
+    return [val.text_content().strip() for val in row.findall(kind)]
+
+def _parse_options_data(table):
+    header = table.findall('thead/tr')
+    header = _unpack(header[0], kind='th')
+    rows = table.findall('tbody/tr')
+    data = [_unpack(r) for r in rows]
+    if len(data) > 0:
+        return TextParser(data, names=header).get_chunk()
+    else: #Empty table
+        return DataFrame(columns=header)
 
-class Options(object):
+class Options(_BaseReader):
     """
     ***Experimental***
     This class fetches call/put data for a given stock/expiry month.
@@ -62,13 +76,13 @@ class Options(object):
     >>> all_data = aapl.get_all_data()
     """
 
-    _TABLE_LOC = {'calls': 1, 'puts': 2}
     _OPTIONS_BASE_URL = 'http://finance.yahoo.com/q/op?s={sym}'
     _FINANCE_BASE_URL = 'http://finance.yahoo.com'
 
-    def __init__(self, symbol):
+    def __init__(self, symbol, session=None):
         """ Instantiates options_data with a ticker saved as symbol """
         self.symbol = symbol.upper()
+        super(Options, self).__init__(symbols=symbol, session=session)
 
     def get_options_data(self, month=None, year=None, expiry=None):
         """
@@ -156,20 +170,19 @@ def _yahoo_url_from_expiry(self, expiry):
         return self._FINANCE_BASE_URL + expiry_links[expiry]
 
     def _option_frames_from_url(self, url):
-        frames = read_html(url)
-        nframes = len(frames)
-        frames_req = max(self._TABLE_LOC.values())
-        if nframes < frames_req:
-            raise RemoteDataError("%s options tables found (%s expected)" % (nframes, frames_req))
+
+        root = self._parse_url(url)
+        calls = root.xpath('//*[@id="optionsCallsTable"]/div[2]/div/table')[0]
+        puts = root.xpath('//*[@id="optionsPutsTable"]/div[2]/div/table')[0]
 
         if not hasattr(self, 'underlying_price'):
             try:
                 self.underlying_price, self.quote_time = self._underlying_price_and_time_from_url(url)
             except IndexError:
                 self.underlying_price, self.quote_time = np.nan, np.nan
 
-        calls = frames[self._TABLE_LOC['calls']]
-        puts = frames[self._TABLE_LOC['puts']]
+        calls = _parse_options_data(calls)
+        puts = _parse_options_data(puts)
 
         calls = self._process_data(calls, 'call')
         puts = self._process_data(puts, 'put')
@@ -648,15 +661,10 @@ def _parse_url(self, url):
         except ImportError: # pragma: no cover
             raise ImportError("Please install lxml if you want to use the "
                               "{0!r} class".format(self.__class__.__name__))
-        try:
-            doc = parse(url)
-        except _network_error_classes: # pragma: no cover
-            raise RemoteDataError("Unable to parse URL "
-                                  "{0!r}".format(url))
-        else:
-            root = doc.getroot()
-            if root is None: # pragma: no cover
-                raise RemoteDataError("Parsed URL {0!r} has no root"
+        doc = parse(self._read_url_as_StringIO(url))
+        root = doc.getroot()
+        if root is None: # pragma: no cover
+            raise RemoteDataError("Parsed URL {0!r} has no root"
                                       "element".format(url))
         return root
 
@@ -682,6 +690,8 @@ def _process_data(self, frame, type):
             frame['Underlying_Price'] = np.nan
             frame["Quote_Time"] = np.nan
         frame.rename(columns={'Open Int': 'Open_Int'}, inplace=True)
+        frame['IV'] = frame['IV'].str.replace(',','').str.strip('%').astype(float)/100
+        frame['PctChg'] = frame['PctChg'].str.replace(',','').str.strip('%').astype(float)/100
         frame['Type'] = type
         frame.set_index(['Strike', 'Expiry', 'Type', 'Symbol'], inplace=True)
 

diff --git a/setup.py b/setup.py
@@ -24,7 +24,7 @@ def readme():
         return f.read()
 
 INSTALL_REQUIRES = (
-    ['pandas', 'requests']
+    ['pandas', 'requests', 'requests-file']
 )
 
 setup(