|
| 1 | +# flake8: noqa |
| 2 | + |
| 3 | +from pandas_datareader.base import _DailyBaseReader |
| 4 | +from pandas import read_csv, compat |
| 5 | +from pandas.compat import StringIO |
| 6 | +import datetime as dt |
| 7 | + |
| 8 | + |
| 9 | +class MoexReader(_DailyBaseReader): |
| 10 | + |
| 11 | + """ |
| 12 | + Returns DataFrame of historical stock prices from symbols, over date |
| 13 | + range, start to end. To avoid being penalized by Moex servers, |
| 14 | + pauses between downloading 'chunks' of symbols can be specified. |
| 15 | +
|
| 16 | + Parameters |
| 17 | + ---------- |
| 18 | + symbols : string, array-like object (list, tuple, Series), or DataFrame |
| 19 | + Single stock symbol (ticker), array-like object of symbols or |
| 20 | + DataFrame with index containing stock symbols. |
| 21 | + start : string, (defaults to '1/1/2010') |
| 22 | + Starting date, timestamp. Parses many different kind of date |
| 23 | + representations (e.g., 'JAN-01-2010', '1/1/10', 'Jan, 1, 1980') |
| 24 | + end : string, (defaults to today) |
| 25 | + Ending date, timestamp. Same format as starting date. |
| 26 | + retry_count : int, default 3 |
| 27 | + Number of times to retry query request. |
| 28 | + pause : int, default 0 |
| 29 | + Time, in seconds, to pause between consecutive queries of chunks. If |
| 30 | + single value given for symbol, represents the pause between retries. |
| 31 | + chunksize : int, default 25 |
| 32 | + Number of symbols to download consecutively before intiating pause. |
| 33 | + session : Session, default None |
| 34 | + requests.sessions.Session instance to be used |
| 35 | + """ |
| 36 | + |
| 37 | + def __init__(self, *args, **kwargs): |
| 38 | + super(MoexReader, self).__init__(*args, **kwargs) |
| 39 | + self.start = self.start.date() |
| 40 | + self.end_dt = self.end |
| 41 | + self.end = self.end.date() |
| 42 | + if not isinstance(self.symbols, compat.string_types): |
| 43 | + raise ValueError("Support for multiple symbols is not yet implemented.") |
| 44 | + |
| 45 | + __url_metadata = "https://iss.moex.com/iss/securities/{symbol}.csv" |
| 46 | + __url_data = "https://iss.moex.com/iss/history/engines/{engine}/" \ |
| 47 | + "markets/{market}/securities/{symbol}.csv" |
| 48 | + |
| 49 | + @property |
| 50 | + def url(self): |
| 51 | + return self.__url_data.format( |
| 52 | + engine=self.__engine, |
| 53 | + market=self.__market, |
| 54 | + symbol=self.symbols |
| 55 | + ) |
| 56 | + |
| 57 | + def _get_params(self, start): |
| 58 | + params = { |
| 59 | + 'iss.only': 'history', |
| 60 | + 'iss.dp': 'point', |
| 61 | + 'iss.df': '%Y-%m-%d', |
| 62 | + 'iss.tf': '%H:%M:%S', |
| 63 | + 'iss.dft': '%Y-%m-%d %H:%M:%S', |
| 64 | + 'iss.json': 'extended', |
| 65 | + 'callback': 'JSON_CALLBACK', |
| 66 | + 'from': start, |
| 67 | + 'till': self.end_dt.strftime('%Y-%m-%d'), |
| 68 | + 'limit': 100, |
| 69 | + 'start': 1, |
| 70 | + 'sort_order': 'TRADEDATE', |
| 71 | + 'sort_order_desc': 'asc' |
| 72 | + } |
| 73 | + return params |
| 74 | + |
| 75 | + def _get_metadata(self): |
| 76 | + """ get a market and an engine for a given symbol """ |
| 77 | + response = self._get_response( |
| 78 | + self.__url_metadata.format(symbol=self.symbols) |
| 79 | + ) |
| 80 | + text = self._sanitize_response(response) |
| 81 | + if len(text) == 0: |
| 82 | + service = self.__class__.__name__ |
| 83 | + raise IOError("{} request returned no data; check URL for invalid " |
| 84 | + "inputs: {}".format(service, self.__url_metadata)) |
| 85 | + if isinstance(text, compat.binary_type): |
| 86 | + text = text.decode('windows-1251') |
| 87 | + else: |
| 88 | + text = text |
| 89 | + |
| 90 | + header_str = 'secid;boardid;' |
| 91 | + get_data = False |
| 92 | + for s in text.splitlines(): |
| 93 | + if s.startswith(header_str): |
| 94 | + get_data = True |
| 95 | + continue |
| 96 | + if get_data and s != '': |
| 97 | + fields = s.split(';') |
| 98 | + return fields[5], fields[7] |
| 99 | + service = self.__class__.__name__ |
| 100 | + raise IOError("{} request returned no metadata: {}\n" |
| 101 | + "Typo in security symbol `{}`?".format( |
| 102 | + service, |
| 103 | + self.__url_metadata.format(symbol=self.symbols), |
| 104 | + self.symbols |
| 105 | + ) |
| 106 | + ) |
| 107 | + |
| 108 | + def read(self): |
| 109 | + """ read data """ |
| 110 | + try: |
| 111 | + self.__market, self.__engine = self._get_metadata() |
| 112 | + |
| 113 | + out_list = [] |
| 114 | + date_column = None |
| 115 | + while True: # read in loop with small date intervals |
| 116 | + if len(out_list) > 0: |
| 117 | + if date_column is None: |
| 118 | + date_column = out_list[0].split(';').index('TRADEDATE') |
| 119 | + |
| 120 | + # get the last downloaded date |
| 121 | + start_str = out_list[-1].split(';', 4)[date_column] |
| 122 | + start = dt.datetime.strptime(start_str, '%Y-%m-%d').date() |
| 123 | + else: |
| 124 | + start_str = self.start.strftime('%Y-%m-%d') |
| 125 | + start = self.start |
| 126 | + |
| 127 | + if start >= self.end or start>=dt.date.today(): |
| 128 | + break |
| 129 | + |
| 130 | + params = self._get_params(start_str) |
| 131 | + strings_out = self._read_url_as_String(self.url, params) \ |
| 132 | + .splitlines()[2:] |
| 133 | + strings_out = list(filter(lambda x: x.strip(), strings_out)) |
| 134 | + |
| 135 | + if len(out_list) == 0: |
| 136 | + out_list = strings_out |
| 137 | + if len(strings_out) < 101: |
| 138 | + break |
| 139 | + else: |
| 140 | + out_list += strings_out[1:] # remove CSV head line |
| 141 | + if len(strings_out) < 100: |
| 142 | + break |
| 143 | + str_io = StringIO('\r\n'.join(out_list)) |
| 144 | + df = self._read_lines(str_io) |
| 145 | + return df |
| 146 | + finally: |
| 147 | + self.close() |
| 148 | + |
| 149 | + def _read_url_as_String(self, url, params=None): |
| 150 | + """ Open url (and retry) """ |
| 151 | + response = self._get_response(url, params=params) |
| 152 | + text = self._sanitize_response(response) |
| 153 | + if len(text) == 0: |
| 154 | + service = self.__class__.__name__ |
| 155 | + raise IOError("{} request returned no data; check URL for invalid " |
| 156 | + "inputs: {}".format(service, self.url)) |
| 157 | + if isinstance(text, compat.binary_type): |
| 158 | + out = text.decode('windows-1251') |
| 159 | + else: |
| 160 | + out = text |
| 161 | + return out |
| 162 | + |
| 163 | + def _read_lines(self, input): |
| 164 | + """ return pandas DataFrame from input """ |
| 165 | + rs = read_csv(input, index_col='TRADEDATE', parse_dates=True, sep=';', |
| 166 | + na_values=('-', 'null')) |
| 167 | + # Get rid of unicode characters in index name. |
| 168 | + try: |
| 169 | + rs.index.name = rs.index.name.decode( |
| 170 | + 'unicode_escape').encode('ascii', 'ignore') |
| 171 | + except AttributeError: |
| 172 | + # Python 3 string has no decode method. |
| 173 | + rs.index.name = rs.index.name.encode('ascii', 'ignore').decode() |
| 174 | + return rs |
0 commit comments