Skip to content

Commit

Permalink
Adding timeout for generate-providers-metadata to tackle network issu…
Browse files Browse the repository at this point in the history
…es (apache#40452)

* Adding timeout for generate-providers-metadata to tackle network issues

* updating to 60s

---------

Co-authored-by: adesai <adesai@cloudera.com>
Rate limit · GitHub

Whoa there!

You have triggered an abuse detection mechanism.

Please wait a few minutes before you try again;
in some cases this may take up to an hour.

amoghrajesh and adesai authored Jun 28, 2024
1 parent f18f484 commit addafdb
Showing 1 changed file with 15 additions and 10 deletions.
25 changes: 15 additions & 10 deletions dev/breeze/src/airflow_breeze/utils/github.py
Original file line number Diff line number Diff line change
@@ -37,30 +37,35 @@ def get_ga_output(name: str, value: Any) -> str:
return f"{output_name}={printed_value}"


def download_file_from_github(tag: str, path: str, output_file: Path) -> bool:
def download_file_from_github(tag: str, path: str, output_file: Path, timeout: int = 60) -> bool:
"""
Downloads a file from GitHub repository of Apache Airflow
:param tag: tag to download from
:param path: path of the file relative to the repository root
:param output_file: Path where the file should be downloaded
:param timeout: timeout in seconds for the download request, default is 60 seconds
:return: whether the file was successfully downloaded (False if the file is missing or error occurred)
"""
import requests

url = f"https://raw.githubusercontent.com/apache/airflow/{tag}/{path}"
get_console().print(f"[info]Downloading {url} to {output_file}")
if not get_dry_run():
response = requests.get(url)
if response.status_code == 404:
get_console().print(f"[warning]The {url} has not been found. Skipping")
try:
response = requests.get(url, timeout=timeout)
if response.status_code == 404:
get_console().print(f"[warning]The {url} has not been found. Skipping")
return False
if response.status_code != 200:
get_console().print(
f"[error]{url} could not be downloaded. Status code {response.status_code}"
)
return False
output_file.write_bytes(response.content)
except requests.Timeout:
get_console().print(f"[error]The request to {url} timed out after {timeout} seconds.")
return False
if response.status_code != 200:
get_console().print(
f"[error]The {url} could not be downloaded. Status code {response.status_code}"
)
return False
output_file.write_bytes(response.content)
get_console().print(f"[success]Downloaded {url} to {output_file}")
return True

0 comments on commit addafdb

Please sign in to comment.