Skip to content

BUG: Correct behavior of yahoo #883

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 12, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions pandas_datareader/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class _BaseReader(object):
pause : float, default 0.1
Time, in seconds, of the pause between retries.
session : Session, default None
requests.sessions.Session instance to be used
requests.sessions.Session instance to be used.
freq : {str, None}
Frequency to use in select readers
"""
Expand Down Expand Up @@ -72,6 +72,7 @@ def __init__(
self.pause_multiplier = 1
self.session = _init_session(session, retry_count)
self.freq = freq
self.headers = None

def close(self):
"""Close network session"""
Expand Down Expand Up @@ -148,7 +149,10 @@ def _get_response(self, url, params=None, headers=None):
parameters passed to the URL
"""

# initial attempt + retry
# Use default headers if not passes and not using a user session
if headers is None:
headers = self.headers

pause = self.pause
last_response_text = ""
for _ in range(self.retry_count + 1):
Expand Down
37 changes: 24 additions & 13 deletions pandas_datareader/yahoo/daily.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@ class YahooDailyReader(_DailyBaseReader):
Time, in seconds, to pause between consecutive queries of chunks. If
single value given for symbol, represents the pause between retries.
session : Session, default None
requests.sessions.Session instance to be used
requests.sessions.Session instance to be used. Passing a session
is an advanced usage and you must either set the required
headers in the session directly or explicitly override
using the ``headers`` argument.
adjust_price : bool, default False
If True, adjusts all prices in hist_data ('Open', 'High', 'Low',
'Close') based on 'Adj Close' price. Adds 'Adj_Ratio' column and drops
Expand All @@ -50,6 +53,9 @@ class YahooDailyReader(_DailyBaseReader):
If True, adds Dividend and Split columns to dataframe.
adjust_dividends: bool, default true
If True, adjusts dividends for splits.
headers : dict, optional
Headers to use when reading data. If None (the default), a
standard set of headers is used.
"""

def __init__(
Expand All @@ -66,6 +72,7 @@ def __init__(
interval="d",
get_actions=False,
adjust_dividends=True,
headers=None,
):
super(YahooDailyReader, self).__init__(
symbols=symbols,
Expand All @@ -80,17 +87,21 @@ def __init__(
# Ladder up the wait time between subsequent requests to improve
# probability of a successful retry
self.pause_multiplier = 2.5

self.headers = {
"Connection": "keep-alive",
"Expires": str(-1),
"Upgrade-Insecure-Requests": str(1),
# Google Chrome:
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"
),
}
if headers is not None:
self.headers = headers
elif session is None:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for this.

self.headers = {
"Connection": "keep-alive",
"Expires": str(-1),
"Upgrade-Insecure-Requests": str(1),
# Google Chrome:
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
),
}
else:
self.headers = None

self.adjust_price = adjust_price
self.ret_index = ret_index
Expand Down Expand Up @@ -150,7 +161,7 @@ def _read_one_data(self, url, params):
del params["symbol"]
url = url.format(symbol)

resp = self._get_response(url, params=params)
resp = self._get_response(url, params=params, headers=self.headers)
ptrn = r"root\.App\.main = (.*?);\n}\(this\)\);"
try:
j = json.loads(re.search(ptrn, resp.text, re.DOTALL).group(1))
Expand Down