Skip to content

Fix multiple download #5

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 18 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ fr.download(url)
- `--filename` (optional): The name to save the downloaded file. Defaults to filename from URL.
- `--max_files` (optional): The number of concurrent file chunks. Defaults to 10.
- `--chunk_size` (optional): The size of each chunk in bytes. Defaults to `2 * 1024 * 1024` (2 MB).
- `--headers` (optional): A dictionary of headers to include in the download request.
- `--show_progress` (optional): Whether to show a progress bar. Defaults to True for single file downloads, and False for multiple files.

## Real-World Speed Test 🏎️

Expand All @@ -75,17 +77,19 @@ Downloading on 🔥: 100%|██████████| 3.42G/3.42G [02:38<00:
```python
from firerequests import FireRequests

url = "https://example.com/largefile.iso"
filename = "largefile.iso"
urls = ["https://example.com/file1.iso", "https://example.com/file2.iso"]
filenames = ["file1.iso", "file2.iso"]

fr = FireRequests()
fr.download(url, filename, max_files=10, chunk_size=2 * 1024 * 1024)
fr.download(urls, filenames, max_files=10, chunk_size=2 * 1024 * 1024, headers={"Authorization": "Bearer token"}, show_progress=True)
```

- **`url`**: The URL of the file to download.
- **`filename`**: The local filename to save the downloaded file.
- **`max_files`**: The maximum number of concurrent chunk downloads.
- **`chunk_size`**: The size of each chunk in bytes.
- **`urls`**: The URL or list of URLs of the file(s) to download.
- **`filenames`**: The filename(s) to save the downloaded file(s). If not provided, filenames are extracted from the URLs.
- **`max_files`**: The maximum number of concurrent chunk downloads. Defaults to 10.
- **`chunk_size`**: The size of each chunk in bytes. Defaults to `2 * 1024 * 1024` (2 MB).
- **`headers`**: A dictionary of headers to include in the download request (optional).
- **`show_progress`**: Whether to show a progress bar during download. Defaults to `True` for a single file, and `False` for multiple files (optional).

### Uploading Files

Expand All @@ -96,9 +100,15 @@ file_path = "largefile.iso"
parts_urls = ["https://example.com/upload_part1", "https://example.com/upload_part2", ...]

fr = FireRequests()
fr.upload(file_path, parts_urls, chunk_size=2 * 1024 * 1024, max_files=10)
fr.upload(file_path, parts_urls, chunk_size=2 * 1024 * 1024, max_files=10, show_progress=True)
```

- **`file_path`**: The local path to the file to upload.
- **`parts_urls`**: A list of URLs where each part of the file will be uploaded.
- **`chunk_size`**: The size of each chunk in bytes. Defaults to `2 * 1024 * 1024` (2 MB).
- **`max_files`**: The maximum number of concurrent chunk uploads. Defaults to 10.
- **`show_progress`**: Whether to show a progress bar during upload. Defaults to `True`.

### Comparing Download Speed

```python
Expand Down
84 changes: 56 additions & 28 deletions firerequests/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from tqdm.asyncio import tqdm
from functools import partial
from concurrent.futures import ThreadPoolExecutor
from typing import Dict, Any, List, Optional
from typing import Union, Dict, Any, List, Optional

# Enable nested event loops for environments like Jupyter
nest_asyncio.apply()
Expand Down Expand Up @@ -49,7 +49,7 @@ async def download_chunk(

async def download_file(
self, url: str, filename: str, max_files: int, chunk_size: int, headers: Optional[Dict[str, str]] = None,
parallel_failures: int = 3, max_retries: int = 5, callback: Optional[Any] = None
parallel_failures: int = 3, max_retries: int = 5, callback: Optional[Any] = None, show_progress: bool = True
):
headers = headers or {"User-Agent": "Wget/1.21.2", "Accept": "*/*", "Accept-Encoding": "identity", "Connection": "Keep-Alive"}
try:
Expand Down Expand Up @@ -82,14 +82,20 @@ async def download_file(
tasks.append(self.download_chunk_with_retries(
session, url, filename, start, stop, headers, semaphore, parallel_failures, max_retries
))

progress_bar = tqdm(total=file_size, unit="B", unit_scale=True, desc="Downloading on 🔥")

if show_progress:
progress_bar = tqdm(total=file_size, unit="B", unit_scale=True, desc="Downloading on 🔥")

for chunk_result in asyncio.as_completed(tasks):
downloaded = await chunk_result
progress_bar.update(downloaded)
if show_progress:
progress_bar.update(downloaded)
if callback:
await callback(downloaded)
progress_bar.close()

if show_progress:
progress_bar.close()

except Exception as e:
print(f"Error in download_file: {e}")

Expand All @@ -111,38 +117,41 @@ async def download_chunk_with_retries(

async def upload_file(
self, file_path: str, parts_urls: List[str], chunk_size: int, max_files: int,
parallel_failures: int = 3, max_retries: int = 5, callback: Optional[Any] = None
parallel_failures: int = 3, max_retries: int = 5, callback: Optional[Any] = None, show_progress: bool = True
):
file_size = os.path.getsize(file_path)
part_size = file_size // len(parts_urls)
last_part_size = file_size - part_size * (len(parts_urls) - 1) # To handle any remaining bytes

semaphore = asyncio.Semaphore(max_files)
tasks = []
try:
async with aiohttp.ClientSession() as session:
for part_number, part_url in enumerate(parts_urls):
# Calculate start and stop positions for each part
if part_number == len(parts_urls) - 1: # For the last part, ensure we include the remaining bytes
if part_number == len(parts_urls) - 1:
start = part_number * part_size
size = last_part_size
else:
start = part_number * part_size
size = part_size

# Start uploading the chunks for the given part

tasks.append(self.upload_chunk_with_retries(
session, part_url, file_path, start, size, chunk_size, semaphore, parallel_failures, max_retries
))

# Track progress using a progress bar
progress_bar = tqdm(total=file_size, unit="B", unit_scale=True, desc="Uploading on 🔥")

if show_progress:
progress_bar = tqdm(total=file_size, unit="B", unit_scale=True, desc="Uploading on 🔥")

for chunk_result in asyncio.as_completed(tasks):
uploaded = await chunk_result
progress_bar.update(uploaded)
if show_progress:
progress_bar.update(uploaded)
if callback:
await callback(uploaded)
progress_bar.close()

if show_progress:
progress_bar.close()

except Exception as e:
print(f"Error in upload_file: {e}")

Expand Down Expand Up @@ -185,25 +194,43 @@ async def upload_chunks(
print(f"Error in upload_chunks: {e}")
return 0

def download(self, url: str, filename: Optional[str] = None, max_files: int = 10, chunk_size: int = 2 * 1024 * 1024):
def download(self, urls: Union[str, List[str]], filenames: Optional[Union[str, List[str]]] = None, headers: Optional[Dict[str, str]] = None, max_files: int = 10, chunk_size: int = 2 * 1024 * 1024, show_progress: Optional[bool] = None):
"""
Downloads a file from a given URL asynchronously in chunks, with support for parallel downloads.
Downloads files from a given URL or a list of URLs asynchronously in chunks, with support for parallel downloads.

Args:
url (str): The URL of the file to download.
filename (Optional[str]): The name of the file to save locally. If not provided, it will be extracted from the URL.
urls (Union[str, List[str]]): The URL or list of URLs of the files to download.
filenames (Optional[Union[str, List[str]]]): The filename or list of filenames to save locally.
If not provided, filenames will be extracted from the URLs.
headers (Optional[Dict[str, str]]): Headers to include in the download requests.
max_files (int): The maximum number of concurrent file download chunks. Defaults to 10.
chunk_size (int): The size of each chunk to download, in bytes. Defaults to 2MB.
show_progress (Optional[bool]): Whether to show a progress bar. Defaults to True for single file, False for multiple files.

Usage:
- This function downloads the file in parallel chunks, speeding up the process.
- This function downloads the files in parallel chunks, speeding up the process.
"""
# Extract filename from URL if not provided
if filename is None:
filename = os.path.basename(urlparse(url).path)
asyncio.run(self.download_file(url, filename, max_files, chunk_size))
if isinstance(urls, str):
urls = [urls]
if isinstance(filenames, str):
filenames = [filenames]

if filenames is None:
filenames = [os.path.basename(urlparse(url).path) for url in urls]
elif len(filenames) != len(urls):
raise ValueError("The number of filenames must match the number of URLs")

# Set default for show_progress based on whether it's a single file or list
if show_progress is None:
show_progress = len(urls) == 1

async def download_all():
tasks = [self.download_file(url, filename, max_files, chunk_size, headers, show_progress=show_progress) for url, filename in zip(urls, filenames)]
await asyncio.gather(*tasks)

asyncio.run(download_all())

def upload(self, file_path: str, parts_urls: List[str], chunk_size: int = 2 * 1024 * 1024, max_files: int = 10):
def upload(self, file_path: str, parts_urls: List[str], chunk_size: int = 2 * 1024 * 1024, max_files: int = 10, show_progress: Optional[bool] = True):
"""
Uploads a file to multiple URLs in chunks asynchronously, with support for parallel uploads.

Expand All @@ -212,11 +239,12 @@ def upload(self, file_path: str, parts_urls: List[str], chunk_size: int = 2 * 10
parts_urls (List[str]): A list of URLs where each part of the file will be uploaded.
chunk_size (int): The size of each chunk to upload, in bytes. Defaults to 2MB.
max_files (int): The maximum number of concurrent file upload chunks. Defaults to 10.
show_progress (bool): Whether to show a progress bar during upload. Defaults to True.

Usage:
- The function divides the file into smaller chunks and uploads them in parallel to different URLs.
"""
asyncio.run(self.upload_file(file_path, parts_urls, chunk_size, max_files))
asyncio.run(self.upload_file(file_path, parts_urls, chunk_size, max_files, show_progress=show_progress))

def normal_download(self, url: str, filename: str):
response = requests.get(url, stream=True)
Expand Down