@@ -27,12 +27,9 @@ def _download_file(
2727 - url: the URL of the file to download
2828 - localDir: Local directory to download file to. If None, the databus folder structure is created in the current working directory.
2929 - vault_token_file: Path to Vault refresh token file
30+ - databus_key: Databus API key for protected downloads
3031 - auth_url: Keycloak token endpoint URL
3132 - client_id: Client ID for token exchange
32-
33- Steps:
34- 1. Try direct GET without Authorization header.
35- 2. If server responds with WWW-Authenticate: Bearer, 401 Unauthorized), then fetch Vault access token and retry with Authorization header.
3633 """
3734 if localDir is None :
3835 _host , account , group , artifact , version , file = (
@@ -54,7 +51,18 @@ def _download_file(
5451 if dirpath :
5552 os .makedirs (dirpath , exist_ok = True ) # Create the necessary directories
5653 # --- 1. Get redirect URL by requesting HEAD ---
57- response = requests .head (url , stream = True , timeout = 30 )
54+ headers = {}
55+ # --- 1a. public databus ---
56+ response = requests .head (url , timeout = 30 )
57+ # --- 1b. Databus API key required ---
58+ if response .status_code == 401 :
59+ # print(f"API key required for {url}")
60+ if not databus_key :
61+ raise ValueError ("Databus API key not given for protected download" )
62+
63+ headers = {"X-API-KEY" : databus_key }
64+ response = requests .head (url , headers = headers , timeout = 30 )
65+
5866 # Check for redirect and update URL if necessary
5967 if response .headers .get ("Location" ) and response .status_code in [
6068 301 ,
@@ -66,33 +74,26 @@ def _download_file(
6674 url = response .headers .get ("Location" )
6775 print ("Redirects url: " , url )
6876
69- # --- 2. Try direct GET ---
70- response = requests .get (url , stream = True , allow_redirects = True , timeout = 30 )
77+ # --- 2. Try direct GET to redirected URL ---
78+ headers ["Accept-Encoding" ] = "identity" # disable gzip to get correct content-length
79+ response = requests .get (url , headers = headers , stream = True , allow_redirects = True , timeout = 30 )
7180 www = response .headers .get (
7281 "WWW-Authenticate" , ""
73- ) # get WWW-Authenticate header if present to check for Bearer auth
82+ ) # Check if authentication is required
7483
75- # Vault token required if 401 Unauthorized with Bearer challenge
84+ # --- 3. If redirected to authentication 401 Unauthorized, get Vault token and retry ---
7685 if response .status_code == 401 and "bearer" in www .lower ():
7786 print (f"Authentication required for { url } " )
7887 if not (vault_token_file ):
7988 raise ValueError ("Vault token file not given for protected download" )
8089
81- # --- 3 . Fetch Vault token ---
90+ # --- 3a . Fetch Vault token ---
8291 # TODO: cache token
8392 vault_token = __get_vault_access__ (url , vault_token_file , auth_url , client_id )
84- headers = {"Authorization" : f"Bearer { vault_token } " }
93+ headers ["Authorization" ] = f"Bearer { vault_token } "
94+ headers .pop ("Accept-Encoding" )
8595
86- # --- 4. Retry with token ---
87- response = requests .get (url , headers = headers , stream = True , timeout = 30 )
88-
89- # Databus API key required if only 401 Unauthorized
90- elif response .status_code == 401 :
91- print (f"API key required for { url } " )
92- if not databus_key :
93- raise ValueError ("Databus API key not given for protected download" )
94-
95- headers = {"X-API-KEY" : databus_key }
96+ # --- 3b. Retry with token ---
9697 response = requests .get (url , headers = headers , stream = True , timeout = 30 )
9798
9899 try :
@@ -104,6 +105,7 @@ def _download_file(
104105 else :
105106 raise e
106107
108+ # --- 4. Download with progress bar ---
107109 total_size_in_bytes = int (response .headers .get ("content-length" , 0 ))
108110 block_size = 1024 # 1 KiB
109111
@@ -114,12 +116,9 @@ def _download_file(
114116 file .write (data )
115117 progress_bar .close ()
116118
117- # TODO: keep check or remove?
119+ # --- 5. Verify download size ---
118120 if total_size_in_bytes != 0 and progress_bar .n != total_size_in_bytes :
119- localsize = os .path .getsize (filename )
120- print (f"\n Headers: { response .headers } " )
121- print (f"\n [WARNING]: Downloaded size { progress_bar .n } does not match Content-Length header { total_size_in_bytes } ( local file size: { localsize } )" )
122- # raise IOError("Downloaded size does not match Content-Length header")
121+ raise IOError ("Downloaded size does not match Content-Length header" )
123122
124123
125124def _download_files (
0 commit comments