Skip to content

Commit 7cd1a8d

Browse files
Merge pull request #157 from e2thenegpii/master
Added webscraper for Thrift Savings Plan
2 parents 47c286c + 315d175 commit 7cd1a8d

File tree

8 files changed

+183
-4
lines changed

8 files changed

+183
-4
lines changed

docs/source/remote_data.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ Currently the following sources are supported:
3030
- :ref:`World Bank<remote_data.wb>`
3131
- :ref:`OECD<remote_data.oecd>`
3232
- :ref:`Eurostat<remote_data.eurostat>`
33+
- :ref:`Thrift Savings Plan<remote_data.tsp>`
3334

3435
It should be noted, that various sources support different kinds of data, so not all sources implement the same methods and the data elements returned might also differ.
3536

@@ -422,3 +423,16 @@ reconnect after waiting a few minutes.
422423
import pandas_datareader.data as web
423424
ed = web.DataReader('daily', 'edgar-index', '1998-05-18', '1998-05-18')
424425
ed[:5]
426+
427+
.. _remote_data.tsp:
428+
429+
TSP Fund Data
430+
431+
Download mutual fund index prices for the TSP.
432+
433+
.. ipython:: python
434+
435+
import pandas_datareader.tsp as tsp
436+
tspreader = tsp.TSPReader(start='2015-10-1', end='2015-12-31')
437+
tspreader.read()
438+

docs/source/whatsnew.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ What's New
1818

1919
These are new features and improvements of note in each release.
2020

21+
.. include:: whatsnew/v0.2.3.txt
2122
.. include:: whatsnew/v0.2.2.txt
2223
.. include:: whatsnew/v0.2.1.txt
2324
.. include:: whatsnew/v0.2.0.txt

docs/source/whatsnew/v0.2.3.txt

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
.. _whatsnew_023:
2+
3+
v0.2.3 (XXX)
4+
----------------------------
5+
6+
This is a minor release from 0.2.2 and includes new features.
7+
8+
9+
Highlights include:
10+
11+
12+
.. contents:: What's new in v0.2.3
13+
:local:
14+
:backlinks: none
15+
16+
.. _whatsnew_023.enhancements:
17+
18+
New features
19+
~~~~~~~~~~~~
20+
21+
- ``DataReader`` now supports pulling data for the TSP.
22+
23+
.. _whatsnew_023.api_breaking:
24+
25+
Backwards incompatible API changes
26+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
27+
28+
.. _whatsnew_023.bug_fixes:
29+
30+
Bug Fixes
31+
~~~~~~~~~

pandas_datareader/base.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,14 +91,22 @@ def _read_url_as_StringIO(self, url, params=None):
9191
Open url (and retry)
9292
"""
9393
response = self._get_response(url, params=params)
94+
text = self._sanitize_response(response)
9495
out = StringIO()
95-
if isinstance(response.content, compat.binary_type):
96-
out.write(bytes_to_str(response.content))
96+
if isinstance(text, compat.binary_type):
97+
out.write(bytes_to_str(text))
9798
else:
98-
out.write(response.content)
99+
out.write(text)
99100
out.seek(0)
100101
return out
101102

103+
@staticmethod
104+
def _sanitize_response(response):
105+
"""
106+
Hook to allow subclasses to clean up response data
107+
"""
108+
return response.content
109+
102110
def _get_response(self, url, params=None):
103111
""" send raw HTTP request to get requests.Response from the specified url
104112
Parameters

pandas_datareader/tests/test_base.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import nose
2+
import pandas.util.testing as tm
3+
import pandas_datareader.base as base
4+
5+
6+
class TestBaseReader(tm.TestCase):
7+
def test_valid_retry_count(self):
8+
with tm.assertRaises(ValueError):
9+
base._BaseReader([], retry_count='stuff')
10+
with tm.assertRaises(ValueError):
11+
base._BaseReader([], retry_count=-1)
12+
13+
def test_invalid_url(self):
14+
with tm.assertRaises(NotImplementedError):
15+
base._BaseReader([]).url
16+
17+
def test_invalid_format(self):
18+
with tm.assertRaises(NotImplementedError):
19+
b = base._BaseReader([])
20+
b._format = 'IM_NOT_AN_IMPLEMENTED_TYPE'
21+
b._read_one_data('a', None)
22+
23+
24+
class TestDailyBaseReader(tm.TestCase):
25+
def test_get_params(self):
26+
with tm.assertRaises(NotImplementedError):
27+
b = base._DailyBaseReader()
28+
b._get_params()
29+
30+
if __name__ == '__main__':
31+
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
32+
exit=False)

pandas_datareader/tests/test_tsp.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import nose
2+
import pandas.util.testing as tm
3+
import datetime as dt
4+
5+
import pandas_datareader.tsp as tsp
6+
7+
8+
class TestTSPFunds(tm.TestCase):
9+
def test_get_allfunds(self):
10+
tspdata = tsp.TSPReader(start='2015-11-2', end='2015-11-2').read()
11+
12+
assert len(tspdata == 1)
13+
14+
assert round(tspdata['I Fund'][dt.date(2015, 11, 2)], 5) == 25.0058
15+
16+
def test_sanitize_response(self):
17+
class response(object):
18+
pass
19+
r = response()
20+
r.text = ' , '
21+
ret = tsp.TSPReader._sanitize_response(r)
22+
assert ret == ''
23+
r.text = ' a,b '
24+
ret = tsp.TSPReader._sanitize_response(r)
25+
assert ret == 'a,b'
26+
27+
if __name__ == '__main__':
28+
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
29+
exit=False)

pandas_datareader/tsp.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
from pandas_datareader.base import _BaseReader
2+
3+
4+
class TSPReader(_BaseReader):
5+
6+
"""
7+
Returns DataFrame of historical TSP fund prices from symbols, over date
8+
range, start to end.
9+
10+
Parameters
11+
----------
12+
symbols : string, array-like object (list, tuple, Series), or DataFrame
13+
Single stock symbol (ticker), array-like object of symbols or
14+
DataFrame with index containing stock symbols.
15+
start : string, (defaults to '1/1/2010')
16+
Starting date, timestamp. Parses many different kind of date
17+
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
18+
end : string, (defaults to today)
19+
Ending date, timestamp. Same format as starting date.
20+
retry_count : int, default 3
21+
Number of times to retry query request.
22+
pause : int, default 0
23+
Time, in seconds, to pause between consecutive queries of chunks. If
24+
single value given for symbol, represents the pause between retries.
25+
session : Session, default None
26+
requests.sessions.Session instance to be used
27+
"""
28+
29+
def __init__(self,
30+
symbols=['Linc', 'L2020', 'L2030', 'L2040', 'L2050', 'G', 'F', 'C', 'S', 'I'],
31+
start=None, end=None, retry_count=3, pause=0.001,
32+
session=None):
33+
super(TSPReader, self).__init__(symbols=symbols,
34+
start=start, end=end,
35+
retry_count=retry_count,
36+
pause=pause, session=session)
37+
self._format = 'string'
38+
39+
@property
40+
def url(self):
41+
return 'https://www.tsp.gov/InvestmentFunds/FundPerformance/index.html'
42+
43+
def read(self):
44+
""" read one data from specified URL """
45+
df = super(TSPReader, self).read()
46+
df.columns = map(lambda x: x.strip(), df.columns)
47+
return df
48+
49+
@property
50+
def params(self):
51+
return {'startdate': self.start.strftime('%m/%d/%Y'),
52+
'enddate': self.end.strftime('%m/%d/%Y'),
53+
'fundgroup': self.symbols,
54+
'whichButton': 'CSV'}
55+
56+
@staticmethod
57+
def _sanitize_response(response):
58+
"""
59+
Clean up the response string
60+
"""
61+
text = response.text.strip()
62+
if text[-1] == ',':
63+
return text[0:-1]
64+
return text

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def readme():
2424
return f.read()
2525

2626
INSTALL_REQUIRES = (
27-
['pandas', 'requests', 'requests-file', 'requests-ftp']
27+
['pandas', 'requests>=2.3.0', 'requests-file', 'requests-ftp']
2828
)
2929

3030
setup(

0 commit comments

Comments
 (0)