Skip to content

Commit ef3e0aa

Browse files
committed
fix: wrong encoding caused wrong content lenght
1 parent f1beb0c commit ef3e0aa

File tree

1 file changed

+25
-26
lines changed

1 file changed

+25
-26
lines changed

databusclient/api/download.py

Lines changed: 25 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,9 @@ def _download_file(
2727
- url: the URL of the file to download
2828
- localDir: Local directory to download file to. If None, the databus folder structure is created in the current working directory.
2929
- vault_token_file: Path to Vault refresh token file
30+
- databus_key: Databus API key for protected downloads
3031
- auth_url: Keycloak token endpoint URL
3132
- client_id: Client ID for token exchange
32-
33-
Steps:
34-
1. Try direct GET without Authorization header.
35-
2. If server responds with WWW-Authenticate: Bearer, 401 Unauthorized), then fetch Vault access token and retry with Authorization header.
3633
"""
3734
if localDir is None:
3835
_host, account, group, artifact, version, file = (
@@ -54,7 +51,18 @@ def _download_file(
5451
if dirpath:
5552
os.makedirs(dirpath, exist_ok=True) # Create the necessary directories
5653
# --- 1. Get redirect URL by requesting HEAD ---
57-
response = requests.head(url, stream=True, timeout=30)
54+
headers = {}
55+
# --- 1a. public databus ---
56+
response = requests.head(url, timeout=30)
57+
# --- 1b. Databus API key required ---
58+
if response.status_code == 401:
59+
# print(f"API key required for {url}")
60+
if not databus_key:
61+
raise ValueError("Databus API key not given for protected download")
62+
63+
headers = {"X-API-KEY": databus_key}
64+
response = requests.head(url, headers=headers, timeout=30)
65+
5866
# Check for redirect and update URL if necessary
5967
if response.headers.get("Location") and response.status_code in [
6068
301,
@@ -66,33 +74,26 @@ def _download_file(
6674
url = response.headers.get("Location")
6775
print("Redirects url: ", url)
6876

69-
# --- 2. Try direct GET ---
70-
response = requests.get(url, stream=True, allow_redirects=True, timeout=30)
77+
# --- 2. Try direct GET to redirected URL ---
78+
headers["Accept-Encoding"] = "identity" # disable gzip to get correct content-length
79+
response = requests.get(url, headers=headers, stream=True, allow_redirects=True, timeout=30)
7180
www = response.headers.get(
7281
"WWW-Authenticate", ""
73-
) # get WWW-Authenticate header if present to check for Bearer auth
82+
) # Check if authentication is required
7483

75-
# Vault token required if 401 Unauthorized with Bearer challenge
84+
# --- 3. If redirected to authentication 401 Unauthorized, get Vault token and retry ---
7685
if response.status_code == 401 and "bearer" in www.lower():
7786
print(f"Authentication required for {url}")
7887
if not (vault_token_file):
7988
raise ValueError("Vault token file not given for protected download")
8089

81-
# --- 3. Fetch Vault token ---
90+
# --- 3a. Fetch Vault token ---
8291
# TODO: cache token
8392
vault_token = __get_vault_access__(url, vault_token_file, auth_url, client_id)
84-
headers = {"Authorization": f"Bearer {vault_token}"}
93+
headers["Authorization"] = f"Bearer {vault_token}"
94+
headers.pop("Accept-Encoding")
8595

86-
# --- 4. Retry with token ---
87-
response = requests.get(url, headers=headers, stream=True, timeout=30)
88-
89-
# Databus API key required if only 401 Unauthorized
90-
elif response.status_code == 401:
91-
print(f"API key required for {url}")
92-
if not databus_key:
93-
raise ValueError("Databus API key not given for protected download")
94-
95-
headers = {"X-API-KEY": databus_key}
96+
# --- 3b. Retry with token ---
9697
response = requests.get(url, headers=headers, stream=True, timeout=30)
9798

9899
try:
@@ -104,6 +105,7 @@ def _download_file(
104105
else:
105106
raise e
106107

108+
# --- 4. Download with progress bar ---
107109
total_size_in_bytes = int(response.headers.get("content-length", 0))
108110
block_size = 1024 # 1 KiB
109111

@@ -114,12 +116,9 @@ def _download_file(
114116
file.write(data)
115117
progress_bar.close()
116118

117-
# TODO: keep check or remove?
119+
# --- 5. Verify download size ---
118120
if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
119-
localsize = os.path.getsize(filename)
120-
print(f"\nHeaders: {response.headers}")
121-
print(f"\n[WARNING]: Downloaded size {progress_bar.n} does not match Content-Length header {total_size_in_bytes} ( local file size: {localsize})")
122-
# raise IOError("Downloaded size does not match Content-Length header")
121+
raise IOError("Downloaded size does not match Content-Length header")
123122

124123

125124
def _download_files(

0 commit comments

Comments
 (0)