Skip to content

Commit f50e5cc

Browse files
authored
Merge pull request #381 from Mottl/moex
Added MOEX data source (Moscow Exchange)
2 parents d71000d + 3ef996e commit f50e5cc

File tree

3 files changed

+200
-1
lines changed

3 files changed

+200
-1
lines changed

pandas_datareader/data.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@
66

77
from pandas_datareader.google.daily import GoogleDailyReader
88
from pandas_datareader.google.quotes import GoogleQuotesReader
9+
from pandas_datareader.google.options import Options as GoogleOptions
910

1011
from pandas_datareader.yahoo.daily import YahooDailyReader
1112
from pandas_datareader.yahoo.quotes import YahooQuotesReader
1213
from pandas_datareader.yahoo.actions import (YahooActionReader, YahooDivReader)
1314
from pandas_datareader.yahoo.components import _get_data as get_components_yahoo # noqa
1415
from pandas_datareader.yahoo.options import Options as YahooOptions
15-
from pandas_datareader.google.options import Options as GoogleOptions
1616

1717
from pandas_datareader.eurostat import EurostatReader
1818
from pandas_datareader.fred import FredReader
@@ -22,6 +22,7 @@
2222
from pandas_datareader.enigma import EnigmaReader
2323
from pandas_datareader.nasdaq_trader import get_nasdaq_symbols
2424
from pandas_datareader.quandl import QuandlReader
25+
from pandas_datareader.moex import MoexReader
2526

2627

2728
def get_data_fred(*args, **kwargs):
@@ -60,6 +61,10 @@ def get_data_quandl(*args, **kwargs):
6061
return QuandlReader(*args, **kwargs).read()
6162

6263

64+
def get_data_moex(*args, **kwargs):
65+
return MoexReader(*args, **kwargs).read()
66+
67+
6368
def DataReader(name, data_source=None, start=None, end=None,
6469
retry_count=3, pause=0.001, session=None, access_key=None):
6570
"""
@@ -170,6 +175,10 @@ def DataReader(name, data_source=None, start=None, end=None,
170175
return QuandlReader(symbols=name, start=start, end=end,
171176
retry_count=retry_count, pause=pause,
172177
session=session).read()
178+
elif data_source == "moex":
179+
return MoexReader(symbols=name, start=start, end=end,
180+
retry_count=retry_count, pause=pause,
181+
session=session).read()
173182
else:
174183
msg = "data_source=%r is not implemented" % data_source
175184
raise NotImplementedError(msg)

pandas_datareader/moex.py

Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
# flake8: noqa
2+
3+
from pandas_datareader.base import _DailyBaseReader
4+
from pandas import read_csv, compat
5+
from pandas.compat import StringIO
6+
import datetime as dt
7+
8+
9+
class MoexReader(_DailyBaseReader):
10+
11+
"""
12+
Returns DataFrame of historical stock prices from symbols, over date
13+
range, start to end. To avoid being penalized by Moex servers,
14+
pauses between downloading 'chunks' of symbols can be specified.
15+
16+
Parameters
17+
----------
18+
symbols : string, array-like object (list, tuple, Series), or DataFrame
19+
Single stock symbol (ticker), array-like object of symbols or
20+
DataFrame with index containing stock symbols.
21+
start : string, (defaults to '1/1/2010')
22+
Starting date, timestamp. Parses many different kind of date
23+
representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980')
24+
end : string, (defaults to today)
25+
Ending date, timestamp. Same format as starting date.
26+
retry_count : int, default 3
27+
Number of times to retry query request.
28+
pause : int, default 0
29+
Time, in seconds, to pause between consecutive queries of chunks. If
30+
single value given for symbol, represents the pause between retries.
31+
chunksize : int, default 25
32+
Number of symbols to download consecutively before intiating pause.
33+
session : Session, default None
34+
requests.sessions.Session instance to be used
35+
"""
36+
37+
def __init__(self, *args, **kwargs):
38+
super(MoexReader, self).__init__(*args, **kwargs)
39+
self.start = self.start.date()
40+
self.end_dt = self.end
41+
self.end = self.end.date()
42+
if not isinstance(self.symbols, compat.string_types):
43+
raise ValueError("Support for multiple symbols is not yet implemented.")
44+
45+
__url_metadata = "https://iss.moex.com/iss/securities/{symbol}.csv"
46+
__url_data = "https://iss.moex.com/iss/history/engines/{engine}/" \
47+
"markets/{market}/securities/{symbol}.csv"
48+
49+
@property
50+
def url(self):
51+
return self.__url_data.format(
52+
engine=self.__engine,
53+
market=self.__market,
54+
symbol=self.symbols
55+
)
56+
57+
def _get_params(self, start):
58+
params = {
59+
'iss.only': 'history',
60+
'iss.dp': 'point',
61+
'iss.df': '%Y-%m-%d',
62+
'iss.tf': '%H:%M:%S',
63+
'iss.dft': '%Y-%m-%d %H:%M:%S',
64+
'iss.json': 'extended',
65+
'callback': 'JSON_CALLBACK',
66+
'from': start,
67+
'till': self.end_dt.strftime('%Y-%m-%d'),
68+
'limit': 100,
69+
'start': 1,
70+
'sort_order': 'TRADEDATE',
71+
'sort_order_desc': 'asc'
72+
}
73+
return params
74+
75+
def _get_metadata(self):
76+
""" get a market and an engine for a given symbol """
77+
response = self._get_response(
78+
self.__url_metadata.format(symbol=self.symbols)
79+
)
80+
text = self._sanitize_response(response)
81+
if len(text) == 0:
82+
service = self.__class__.__name__
83+
raise IOError("{} request returned no data; check URL for invalid "
84+
"inputs: {}".format(service, self.__url_metadata))
85+
if isinstance(text, compat.binary_type):
86+
text = text.decode('windows-1251')
87+
else:
88+
text = text
89+
90+
header_str = 'secid;boardid;'
91+
get_data = False
92+
for s in text.splitlines():
93+
if s.startswith(header_str):
94+
get_data = True
95+
continue
96+
if get_data and s != '':
97+
fields = s.split(';')
98+
return fields[5], fields[7]
99+
service = self.__class__.__name__
100+
raise IOError("{} request returned no metadata: {}\n"
101+
"Typo in security symbol `{}`?".format(
102+
service,
103+
self.__url_metadata.format(symbol=self.symbols),
104+
self.symbols
105+
)
106+
)
107+
108+
def read(self):
109+
""" read data """
110+
try:
111+
self.__market, self.__engine = self._get_metadata()
112+
113+
out_list = []
114+
date_column = None
115+
while True: # read in loop with small date intervals
116+
if len(out_list) > 0:
117+
if date_column is None:
118+
date_column = out_list[0].split(';').index('TRADEDATE')
119+
120+
# get the last downloaded date
121+
start_str = out_list[-1].split(';', 4)[date_column]
122+
start = dt.datetime.strptime(start_str, '%Y-%m-%d').date()
123+
else:
124+
start_str = self.start.strftime('%Y-%m-%d')
125+
start = self.start
126+
127+
if start >= self.end or start>=dt.date.today():
128+
break
129+
130+
params = self._get_params(start_str)
131+
strings_out = self._read_url_as_String(self.url, params) \
132+
.splitlines()[2:]
133+
strings_out = list(filter(lambda x: x.strip(), strings_out))
134+
135+
if len(out_list) == 0:
136+
out_list = strings_out
137+
if len(strings_out) < 101:
138+
break
139+
else:
140+
out_list += strings_out[1:] # remove CSV head line
141+
if len(strings_out) < 100:
142+
break
143+
str_io = StringIO('\r\n'.join(out_list))
144+
df = self._read_lines(str_io)
145+
return df
146+
finally:
147+
self.close()
148+
149+
def _read_url_as_String(self, url, params=None):
150+
""" Open url (and retry) """
151+
response = self._get_response(url, params=params)
152+
text = self._sanitize_response(response)
153+
if len(text) == 0:
154+
service = self.__class__.__name__
155+
raise IOError("{} request returned no data; check URL for invalid "
156+
"inputs: {}".format(service, self.url))
157+
if isinstance(text, compat.binary_type):
158+
out = text.decode('windows-1251')
159+
else:
160+
out = text
161+
return out
162+
163+
def _read_lines(self, input):
164+
""" return pandas DataFrame from input """
165+
rs = read_csv(input, index_col='TRADEDATE', parse_dates=True, sep=';',
166+
na_values=('-', 'null'))
167+
# Get rid of unicode characters in index name.
168+
try:
169+
rs.index.name = rs.index.name.decode(
170+
'unicode_escape').encode('ascii', 'ignore')
171+
except AttributeError:
172+
# Python 3 string has no decode method.
173+
rs.index.name = rs.index.name.encode('ascii', 'ignore').decode()
174+
return rs

pandas_datareader/tests/test_moex.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
import pytest
2+
3+
from requests.exceptions import HTTPError
4+
import pandas_datareader.data as web
5+
6+
7+
class TestMoex(object):
8+
def test_moex_datareader(self):
9+
try:
10+
df = web.DataReader("USD000UTSTOM",
11+
"moex",
12+
start="2017-07-01",
13+
end="2017-07-31")
14+
assert 'SECID' in df.columns
15+
except HTTPError as e:
16+
pytest.skip(e)

0 commit comments

Comments
 (0)