11import ssl
2+ import tempfile
23from dataclasses import dataclass
34from pathlib import Path
45from typing import Dict , Optional
@@ -30,11 +31,8 @@ def _is_http_url(self, path: str) -> bool:
3031 """Check if the path is an HTTP or HTTPS URL."""
3132 return isinstance (path , str ) and path .startswith (("http://" , "https://" ))
3233
33- def _download_http (self , url : str , insecure_tls : bool = False ) -> bytes :
34- """Download a file from HTTP/HTTPS URL using aiohttp."""
35-
34+ def _download_http_to_storage (self , url : str , storage , filename : str , insecure_tls : bool = False ) -> None :
3635 async def _download ():
37- # For http:// or when insecure_tls is set, disable SSL verification
3836 parsed = urlparse (url )
3937 if parsed .scheme == "http" or insecure_tls :
4038 ssl_context : ssl .SSLContext | bool = False
@@ -45,9 +43,16 @@ async def _download():
4543 async with aiohttp .ClientSession (connector = connector ) as session :
4644 async with session .get (url ) as response :
4745 response .raise_for_status ()
48- return await response .read ()
46+ with tempfile .NamedTemporaryFile (delete = False , dir = "/var/tmp" ) as f :
47+ async for chunk in response .content .iter_chunked (65536 ):
48+ f .write (chunk )
49+ return Path (f .name )
4950
50- return self .portal .call (_download )
51+ tmp_path = self .portal .call (_download )
52+ try :
53+ storage .write_from_path (filename , tmp_path )
54+ finally :
55+ tmp_path .unlink ()
5156
5257 def _upload_file_if_needed (
5358 self , file_path : str , operator : Operator | None = None , insecure_tls : bool = False
@@ -59,12 +64,11 @@ def _upload_file_if_needed(
5964 parsed = urlparse (file_path )
6065 is_insecure_http = parsed .scheme == "http"
6166
62- # usse aiohttp for: http:// URLs, or https:// with insecure_tls
67+ # use aiohttp for: http:// URLs, or https:// with insecure_tls
6368 if is_insecure_http or insecure_tls :
6469 filename = Path (parsed .path ).name
6570 self .logger .info (f"Downloading { file_path } to storage as { filename } " )
66- content = self ._download_http (file_path , insecure_tls = insecure_tls )
67- self .storage .write_bytes (filename , content )
71+ self ._download_http_to_storage (file_path , self .storage , filename , insecure_tls = insecure_tls )
6872 return filename
6973
7074 # use opendal for local files, https:// (secure), and other schemes
0 commit comments