Skip to content

ENH: Add support for Nasdaq symbol list #254

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 14, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions docs/source/remote_data.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ Currently the following sources are supported:
- :ref:`Eurostat<remote_data.eurostat>`
- :ref:`Thrift Savings Plan<remote_data.tsp>`
- :ref:`Oanda currency historical rate<remote_data.oanda_curr_hist>`
- :ref:`Nasdaq Trader symbol definitions<remote_data.nasdaq_symbols`

It should be noted, that various sources support different kinds of data, so not all sources implement the same methods and the data elements returned might also differ.

Expand Down Expand Up @@ -540,3 +541,33 @@ Download currency historical rate from `Oanda <https://www.oanda.com/>`__.
2016-06-01 1.115170 1.445410 0.009095

[153 rows x 3 columns]

.. _remote_data.nasdaq_symbols

Nasdaq Trader Symbol Definitions
==============================

Download the latest symbols from `Nasdaq<ftp://ftp.nasdaqtrader.com/SymbolDirectory/nasdaqtraded.txt/>`__.

Note that Nasdaq updates this file daily, and historical versions are not
available. More information on the `field<http://www.nasdaqtrader.com/trader.aspx?id=symboldirdefs/>` definitions.

.. code-block:: python

In [12]: from pandas_datareader.nasdaq_trader import get_nasdaq_symbols
In [13]: symbols = get_nasdaq_symbols()
In [14]: print(symbols.ix['IBM'])
Nasdaq Traded True
Security Name International Business Machines Corporation Co...
Listing Exchange N
Market Category
ETF False
Round Lot Size 100
Test Issue False
Financial Status NaN
CQS Symbol IBM
NASDAQ Symbol IBM
NextShares False
Name: IBM, dtype: object


6 changes: 6 additions & 0 deletions pandas_datareader/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from pandas_datareader.edgar import EdgarIndexReader
from pandas_datareader.enigma import EnigmaReader
from pandas_datareader.oanda import get_oanda_currency_historical_rates
from pandas_datareader.nasdaq_trader import get_nasdaq_symbols


def get_data_fred(*args, **kwargs):
Expand Down Expand Up @@ -161,6 +162,11 @@ def DataReader(name, data_source=None, start=None, end=None,
quote_currency="USD", base_currency=name,
reversed=True, session=session
)
elif data_source == 'nasdaq':
if name != 'symbols':
raise ValueError("Only the string 'symbols' is supported for "
"Nasdaq, not %r" % (name,))
return get_nasdaq_symbols(retry_count=retry_count, pause=pause)
else:
msg = "data_source=%r is not implemented" % data_source
raise NotImplementedError(msg)
Expand Down
102 changes: 102 additions & 0 deletions pandas_datareader/nasdaq_trader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
from ftplib import FTP, all_errors
from pandas import read_csv
from pandas_datareader._utils import RemoteDataError
from pandas.compat import StringIO
import time

_NASDAQ_TICKER_LOC = '/SymbolDirectory/nasdaqtraded.txt'
_NASDAQ_FTP_SERVER = 'ftp.nasdaqtrader.com'
_TICKER_DTYPE = [('Nasdaq Traded', bool),
('Symbol', str),
('Security Name', str),
('Listing Exchange', str),
('Market Category', str),
('ETF', bool),
('Round Lot Size', float),
('Test Issue', bool),
('Financial Status', str),
('CQS Symbol', str),
('NASDAQ Symbol', str),
('NextShares', bool)]
_CATEGORICAL = ('Listing Exchange', 'Financial Status')

_DELIMITER = '|'
_ticker_cache = None


def _bool_converter(item):
return item == 'Y'


def _download_nasdaq_symbols(timeout):
"""
@param timeout: the time to wait for the FTP connection
"""
try:
ftp_session = FTP(_NASDAQ_FTP_SERVER, timeout=timeout)
ftp_session.login()
except all_errors as err:
raise RemoteDataError('Error connecting to %r: $s' %
(_NASDAQ_FTP_SERVER, err))

lines = []
try:
ftp_session.retrlines('RETR ' + _NASDAQ_TICKER_LOC, lines.append)
except all_errors as err:
raise RemoteDataError('Error downloading from %r: $s' %
(_NASDAQ_FTP_SERVER, err))
finally:
ftp_session.close()

# Sanity Checking
if not lines[-1].startswith('File Creation Time:'):
raise RemoteDataError('Missing expected footer. Found %r' % lines[-1])

# Convert Y/N to True/False.
converter_map = dict((col, _bool_converter) for col, t in _TICKER_DTYPE
if t is bool)

data = read_csv(StringIO('\n'.join(lines[:-1])), '|',
dtype=_TICKER_DTYPE, converters=converter_map,
index_col=1)

# Properly cast enumerations
for cat in _CATEGORICAL:
data[cat] = data[cat].astype('category')

return data


def get_nasdaq_symbols(retry_count=3, timeout=30, pause=None):
"""
Get the list of all available equity symbols from Nasdaq.

Returns
-------
nasdaq_tickers : pandas.DataFrame
DataFrame with company tickers, names, and other properties.
"""
global _ticker_cache

if timeout < 0:
raise ValueError('timeout must be >= 0, not %r' % (timeout,))

if pause is None:
pause = timeout / 3
elif pause < 0:
raise ValueError('pause must be >= 0, not %r' % (pause,))

if _ticker_cache is None:
while retry_count > 0:
try:
_ticker_cache = _download_nasdaq_symbols(timeout=timeout)
retry_count = -1
except RemoteDataError:
# retry on any exception
if retry_count <= 0:
raise
else:
retry_count -= 1
time.sleep(pause)

return _ticker_cache
22 changes: 22 additions & 0 deletions pandas_datareader/tests/test_nasdaq.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import nose

import pandas.util.testing as tm

import pandas_datareader.data as web
from pandas_datareader._utils import RemoteDataError


class TestNasdaqSymbols(tm.TestCase):

def test_get_symbols(self):
try:
symbols = web.DataReader('symbols', 'nasdaq')
except RemoteDataError as e:
raise nose.SkipTest(e)

assert 'IBM' in symbols.index


if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)