diff --git a/scripts/browse.py b/scripts/browse.py index 510f9c29d5f3..0fda3d7b06ab 100644 --- a/scripts/browse.py +++ b/scripts/browse.py @@ -6,7 +6,7 @@ cfg = Config() def scrape_text(url): - response = requests.get(url) + response = requests.get(url, headers=cfg.user_agent_header) # Check if the response contains an HTTP error if response.status_code >= 400: @@ -40,7 +40,7 @@ def format_hyperlinks(hyperlinks): def scrape_links(url): - response = requests.get(url) + response = requests.get(url, headers=cfg.user_agent_header) # Check if the response contains an HTTP error if response.status_code >= 400: diff --git a/scripts/config.py b/scripts/config.py index 766cb94f415f..4a4497694566 100644 --- a/scripts/config.py +++ b/scripts/config.py @@ -39,6 +39,11 @@ def __init__(self): self.google_api_key = os.getenv("GOOGLE_API_KEY") self.custom_search_engine_id = os.getenv("CUSTOM_SEARCH_ENGINE_ID") + # User agent headers to use when browsing web + # Some websites might just completely deny request with an error code if no user agent was found. + self.user_agent_header = {"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36"} + + # Initialize the OpenAI API client openai.api_key = self.openai_api_key