From 6e98a79a27bb6f72d7d6405cff43784d64241b9a Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Mon, 23 Sep 2024 05:05:46 +0530 Subject: [PATCH 01/50] Update main.py --- firerequests/main.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/firerequests/main.py b/firerequests/main.py index 14a5ccc..22f6b46 100644 --- a/firerequests/main.py +++ b/firerequests/main.py @@ -5,6 +5,8 @@ import time import requests import nest_asyncio +import argparse +from urllib.parse import urlparse from aiohttp import ClientSession from aiofiles.os import remove from tqdm.asyncio import tqdm @@ -181,9 +183,21 @@ def compare_speed(self, url: str, filename: str): except Exception as e: print(f"Error in compare_speed: {e}") +def main(): + parser = argparse.ArgumentParser(description="FireRequests CLI") + parser.add_argument("url", type=str, help="The URL to download the file from") + parser.add_argument("--filename", type=str, help="The filename to save the download") + parser.add_argument("--max_files", type=int, default=10, help="The number of concurrent file chunks") + parser.add_argument("--chunk_size", type=int, default=2 * 1024 * 1024, help="The size of each chunk in bytes") + + args = parser.parse_args() + + # Extract filename from URL if not provided + if not args.filename: + args.filename = os.path.basename(urlparse(args.url).path) -if __name__ == "__main__": - url = "https://mirror.clarkson.edu/zorinos/isos/17/Zorin-OS-17.2-Core-64-bit.iso" - filename = "Zorin-OS-17.2-Core-64-bit.iso" fr = FireRequests() - fr.compare_speed(url, filename) + fr.download(args.url, args.filename, args.max_files, args.chunk_size) + +if __name__ == "__main__": + main() From 0e5bbac24ecf1e98b7317c821949a091323a8c8c Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Mon, 23 Sep 2024 05:07:13 +0530 Subject: [PATCH 02/50] Update setup.py --- setup.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 25f3d5f..173759e 100644 --- a/setup.py +++ b/setup.py @@ -47,6 +47,10 @@ "Operating System :: OS Independent", ], python_requires=">=3.9", - # entry_points={"console_scripts": ["firerequests = firerequests.main:coming_soon"]}, + entry_points={ + "console_scripts": [ + "firerequests=firerequests.main:main", + ], + }, install_requires=requirements, -) \ No newline at end of file +) From 4bc7154593a9841e6b658322af3a81d3b7b66ab5 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Mon, 23 Sep 2024 05:24:36 +0530 Subject: [PATCH 03/50] Update README.md --- README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/README.md b/README.md index 300012a..2037546 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,8 @@ pip install firerequests Accelerate your downloads with just a few lines of code: +### Python Usage + ```python from firerequests import FireRequests @@ -34,6 +36,18 @@ fr = FireRequests() fr.download(url, filename) ``` +### Command Line Interface + +```bash +!firerequests https://mirror.clarkson.edu/zorinos/isos/17/Zorin-OS-17.2-Core-64-bit.iso --filename Zorin-OS-17.2-Core-64-bit.iso +``` + +#### Parameters: +- `url` (required): The URL to download the file from. +- `--filename` (optional): The name to save the downloaded file. Defaults to filename from URL. +- `--max_files` (optional): The number of concurrent file chunks. Defaults to 10. +- `--chunk_size` (optional): The size of each chunk in bytes. Defaults to `2 * 1024 * 1024` (2 MB). + ## Real-World Speed Test 🏎️ FireRequests delivers significant performance improvements over traditional download methods. Below is the result of a real-world speed test: From fadde72a3eaf45ee4fb2bae391c7fb6525499b14 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Mon, 23 Sep 2024 09:35:53 +0530 Subject: [PATCH 04/50] Update main.py --- firerequests/main.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/firerequests/main.py b/firerequests/main.py index 22f6b46..d388e20 100644 --- a/firerequests/main.py +++ b/firerequests/main.py @@ -53,10 +53,13 @@ async def download_file( headers = headers or {} try: async with aiohttp.ClientSession() as session: - async with session.head(url) as resp: + # First, follow any redirects and get the final download URL + async with session.head(url, allow_redirects=True) as resp: + if resp.status in [301, 302]: + url = str(resp.url) # The final resolved URL after redirection file_size = int(resp.headers['Content-Length']) chunks = range(0, file_size, chunk_size) - + # Create an empty file async with aiofiles.open(filename, "wb") as f: await f.seek(file_size - 1) @@ -69,7 +72,7 @@ async def download_file( tasks.append(self.download_chunk_with_retries( session, url, filename, start, stop, headers, semaphore, parallel_failures, max_retries )) - + progress_bar = tqdm(total=file_size, unit="B", unit_scale=True, desc="Downloading on πŸ”₯") for chunk_result in asyncio.as_completed(tasks): downloaded = await chunk_result From da1b03c222b30e2be32b5ff1aaa71be89c3b05b9 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Mon, 23 Sep 2024 09:50:44 +0530 Subject: [PATCH 05/50] Update requirements.txt --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 019561e..0d5d2d6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ aiohttp aiofiles requests nest_asyncio -tqdm \ No newline at end of file +tqdm +httpx From db998c769ff6ba1dee3a756693cbd15cdc8c9a81 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Mon, 23 Sep 2024 19:48:20 +0530 Subject: [PATCH 06/50] Update main.py --- firerequests/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firerequests/main.py b/firerequests/main.py index d388e20..61c2897 100644 --- a/firerequests/main.py +++ b/firerequests/main.py @@ -164,7 +164,7 @@ def normal_download(self, url: str, filename: str): total_size = int(response.headers.get('content-length', 0)) progress_bar = tqdm(total=total_size, unit="B", unit_scale=True, desc="Normal Download 🐌") with open(filename, 'wb') as f: - for data in response.iter_content(1024): + for data in response.iter_content(2 * 1024 * 1024): progress_bar.update(len(data)) f.write(data) progress_bar.close() From 6b5fbe7da851d54705a7b4c523ecca9c548cc708 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Mon, 23 Sep 2024 20:21:22 +0530 Subject: [PATCH 07/50] Update main.py --- firerequests/main.py | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/firerequests/main.py b/firerequests/main.py index 61c2897..4e49b80 100644 --- a/firerequests/main.py +++ b/firerequests/main.py @@ -5,7 +5,8 @@ import time import requests import nest_asyncio -import argparse +# import argparse +import fire from urllib.parse import urlparse from aiohttp import ClientSession from aiofiles.os import remove @@ -153,7 +154,10 @@ async def upload_chunk( except Exception as e: print(f"Error in upload_chunk: {e}") - def download(self, url: str, filename: str, max_files: int = 10, chunk_size: int = 2 * 1024 * 1024): + def download(self, url: str, filename: Optional[str] = None, max_files: int = 10, chunk_size: int = 2 * 1024 * 1024): + # Extract filename from URL if not provided + if filename is None: + filename = os.path.basename(urlparse(url).path) asyncio.run(self.download_file(url, filename, max_files, chunk_size)) def upload(self, file_path: str, parts_urls: List[str], chunk_size: int = 2 * 1024 * 1024, max_files: int = 10): @@ -169,7 +173,9 @@ def normal_download(self, url: str, filename: str): f.write(data) progress_bar.close() - def compare_speed(self, url: str, filename: str): + def compare_speed(self, url: str, filename: Optional[str] = None): + if filename is None: + filename = os.path.basename(urlparse(url).path) try: start_time = time.time() self.normal_download(url, filename) @@ -186,21 +192,5 @@ def compare_speed(self, url: str, filename: str): except Exception as e: print(f"Error in compare_speed: {e}") -def main(): - parser = argparse.ArgumentParser(description="FireRequests CLI") - parser.add_argument("url", type=str, help="The URL to download the file from") - parser.add_argument("--filename", type=str, help="The filename to save the download") - parser.add_argument("--max_files", type=int, default=10, help="The number of concurrent file chunks") - parser.add_argument("--chunk_size", type=int, default=2 * 1024 * 1024, help="The size of each chunk in bytes") - - args = parser.parse_args() - - # Extract filename from URL if not provided - if not args.filename: - args.filename = os.path.basename(urlparse(args.url).path) - - fr = FireRequests() - fr.download(args.url, args.filename, args.max_files, args.chunk_size) - if __name__ == "__main__": - main() + fire.Fire(FireRequests) From 649ef83d8a2f59cb605a245d4ea99467b4b989d6 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Mon, 23 Sep 2024 20:30:29 +0530 Subject: [PATCH 08/50] Update requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 0d5d2d6..cc1222a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ requests nest_asyncio tqdm httpx +fire From 833aafab0d34e475571f6c1f90f22d704c988036 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Mon, 23 Sep 2024 20:32:39 +0530 Subject: [PATCH 09/50] Update main.py --- firerequests/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firerequests/main.py b/firerequests/main.py index 4e49b80..2139cb3 100644 --- a/firerequests/main.py +++ b/firerequests/main.py @@ -192,5 +192,5 @@ def compare_speed(self, url: str, filename: Optional[str] = None): except Exception as e: print(f"Error in compare_speed: {e}") -if __name__ == "__main__": +def main(): fire.Fire(FireRequests) From ecaa1cb06e35acf754a126ab7d7a179c98ae0839 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Mon, 23 Sep 2024 20:37:20 +0530 Subject: [PATCH 10/50] Update setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 173759e..f057aa2 100644 --- a/setup.py +++ b/setup.py @@ -49,7 +49,7 @@ python_requires=">=3.9", entry_points={ "console_scripts": [ - "firerequests=firerequests.main:main", + "fr=firerequests.main:main", ], }, install_requires=requirements, From 519ff730587d04366d724fad208c1a010e6c3d89 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Mon, 23 Sep 2024 20:45:07 +0530 Subject: [PATCH 11/50] Update README.md --- README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 2037546..588c81d 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ fr.download(url, filename) ### Command Line Interface ```bash -!firerequests https://mirror.clarkson.edu/zorinos/isos/17/Zorin-OS-17.2-Core-64-bit.iso --filename Zorin-OS-17.2-Core-64-bit.iso +!fr download https://mirror.clarkson.edu/zorinos/isos/17/Zorin-OS-17.2-Core-64-bit.iso ``` #### Parameters: @@ -60,6 +60,13 @@ Downloading on πŸ”₯: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 3.42G/3.42G [03:29<00: πŸ”₯ Download Time: 209.41 seconds ``` + + +For Hugging Face Hub downloads it is recommended to use `hf_transfer` for maximum speed gains! +For more details, please take a look at this [section](https://huggingface.co/docs/huggingface_hub/hf_transfer). + + + ## Advanced Usage βš™οΈ ### Downloading Files From 17b15976e7dc044024d588a2940bf26c14d3a0d3 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Tue, 24 Sep 2024 00:56:45 +0530 Subject: [PATCH 12/50] Update README.md --- README.md | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 588c81d..638868f 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ fr.download(url, filename) ### Command Line Interface ```bash -!fr download https://mirror.clarkson.edu/zorinos/isos/17/Zorin-OS-17.2-Core-64-bit.iso +fr download https://mirror.clarkson.edu/zorinos/isos/17/Zorin-OS-17.2-Core-64-bit.iso ``` #### Parameters: @@ -53,19 +53,16 @@ fr.download(url, filename) FireRequests delivers significant performance improvements over traditional download methods. Below is the result of a real-world speed test: ```plaintext -Normal Download 🐌: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 3.42G/3.42G [18:26<00:00, 3.09MB/s] -Downloading on πŸ”₯: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 3.42G/3.42G [03:29<00:00, 16.4MB/s] +Normal Download 🐌: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 3.42G/3.42G [18:24<00:00, 3.10MB/s] +Downloading on πŸ”₯: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 3.42G/3.42G [02:38<00:00, 21.6MB/s] -🐌 Download Time: 1107.32 seconds -πŸ”₯ Download Time: 209.41 seconds +🐌 Download Time: 1104.84 seconds +πŸ”₯ Download Time: 158.22 seconds ``` - - -For Hugging Face Hub downloads it is recommended to use `hf_transfer` for maximum speed gains! -For more details, please take a look at this [section](https://huggingface.co/docs/huggingface_hub/hf_transfer). - - +> [!TIP] +> For Hugging Face Hub downloads it is recommended to use `hf_transfer` for maximum speed gains! +> For more details, please take a look at this [section](https://huggingface.co/docs/huggingface_hub/hf_transfer). ## Advanced Usage βš™οΈ @@ -107,7 +104,7 @@ url = "https://example.com/largefile.iso" filename = "largefile.iso" fr = FireRequests() -fr.compare_speed(url, filename) +fr.compare(url, filename) ``` ## License πŸ“„ From 3cfdc1f235b1e28e5c273ef649aee30485355fbb Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Tue, 24 Sep 2024 00:58:46 +0530 Subject: [PATCH 13/50] simplified public apis --- firerequests/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/firerequests/main.py b/firerequests/main.py index 2139cb3..349518b 100644 --- a/firerequests/main.py +++ b/firerequests/main.py @@ -173,7 +173,7 @@ def normal_download(self, url: str, filename: str): f.write(data) progress_bar.close() - def compare_speed(self, url: str, filename: Optional[str] = None): + def compare(self, url: str, filename: Optional[str] = None): if filename is None: filename = os.path.basename(urlparse(url).path) try: @@ -190,7 +190,7 @@ def compare_speed(self, url: str, filename: Optional[str] = None): print(f"\n🐌 Download Time: {normal_time:.2f} seconds") print(f"πŸ”₯ Download Time: {fire_time:.2f} seconds\n") except Exception as e: - print(f"Error in compare_speed: {e}") + print(f"Error in compare: {e}") def main(): fire.Fire(FireRequests) From 52f77f1a2642107216250f5b9929ff7cbc59fe5a Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Tue, 24 Sep 2024 02:48:53 +0530 Subject: [PATCH 14/50] Update main.py --- firerequests/main.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/firerequests/main.py b/firerequests/main.py index 349518b..eaaf6af 100644 --- a/firerequests/main.py +++ b/firerequests/main.py @@ -5,7 +5,7 @@ import time import requests import nest_asyncio -# import argparse +import socket import fire from urllib.parse import urlparse from aiohttp import ClientSession @@ -53,12 +53,27 @@ async def download_file( ): headers = headers or {} try: + # Resolve the domain name and get IP address + parsed_url = urlparse(url) + ip_address = socket.gethostbyname(parsed_url.hostname) + async with aiohttp.ClientSession() as session: - # First, follow any redirects and get the final download URL + # Follow redirects and get the final download URL async with session.head(url, allow_redirects=True) as resp: if resp.status in [301, 302]: - url = str(resp.url) # The final resolved URL after redirection + url = str(resp.url) # Final URL after redirection + + # Print wget-like headers + print(f"--{time.strftime('%Y-%m-%d %H:%M:%S')}-- {url}") + print(f"Resolving {parsed_url.hostname} ({parsed_url.hostname})... {ip_address}") + print(f"Connecting to {parsed_url.hostname} ({ip_address})... connected.") + print(f"HTTP request sent, awaiting response... {resp.status} {resp.reason}") + file_size = int(resp.headers['Content-Length']) + content_type = resp.headers.get('Content-Type', 'application/octet-stream') + print(f"Length: {file_size} ({file_size // (1024 * 1024):.1f}M) [{content_type}]") + print(f"Saving to: '{filename}'\n") + chunks = range(0, file_size, chunk_size) # Create an empty file From 1f51aabfc1d76bc36ebae05282970593d602aec0 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Tue, 24 Sep 2024 03:03:34 +0530 Subject: [PATCH 15/50] Update main.py --- firerequests/main.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/firerequests/main.py b/firerequests/main.py index eaaf6af..7ed043e 100644 --- a/firerequests/main.py +++ b/firerequests/main.py @@ -60,14 +60,21 @@ async def download_file( async with aiohttp.ClientSession() as session: # Follow redirects and get the final download URL async with session.head(url, allow_redirects=True) as resp: - if resp.status in [301, 302]: - url = str(resp.url) # Final URL after redirection - # Print wget-like headers print(f"--{time.strftime('%Y-%m-%d %H:%M:%S')}-- {url}") print(f"Resolving {parsed_url.hostname} ({parsed_url.hostname})... {ip_address}") print(f"Connecting to {parsed_url.hostname} ({ip_address})... connected.") print(f"HTTP request sent, awaiting response... {resp.status} {resp.reason}") + + if resp.status in [301, 302]: + url = str(resp.url) # Final URL after redirection + print(f"Location: {url}") + parsed_url = urlparse(url) + ip_address = socket.gethostbyname(parsed_url.hostname) + print(f"--{time.strftime('%Y-%m-%d %H:%M:%S')}-- {url}") + print(f"Resolving {parsed_url.hostname} ({parsed_url.hostname})... {ip_address}") + print(f"Connecting to {parsed_url.hostname} ({ip_address})... connected.") + print(f"HTTP request sent, awaiting response... {resp.status} {resp.reason}") file_size = int(resp.headers['Content-Length']) content_type = resp.headers.get('Content-Type', 'application/octet-stream') From 2393eb4770e43fb98637605afc8501c7991d5bae Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Tue, 24 Sep 2024 03:27:21 +0530 Subject: [PATCH 16/50] Update main.py --- firerequests/main.py | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/firerequests/main.py b/firerequests/main.py index 7ed043e..e9de3fd 100644 --- a/firerequests/main.py +++ b/firerequests/main.py @@ -53,34 +53,22 @@ async def download_file( ): headers = headers or {} try: - # Resolve the domain name and get IP address - parsed_url = urlparse(url) - ip_address = socket.gethostbyname(parsed_url.hostname) - async with aiohttp.ClientSession() as session: # Follow redirects and get the final download URL async with session.head(url, allow_redirects=True) as resp: + # Resolve the domain name and get IP address + url = str(resp.url) + parsed_url = urlparse(url) + ip_address = socket.gethostbyname(parsed_url.hostname) # Print wget-like headers print(f"--{time.strftime('%Y-%m-%d %H:%M:%S')}-- {url}") print(f"Resolving {parsed_url.hostname} ({parsed_url.hostname})... {ip_address}") print(f"Connecting to {parsed_url.hostname} ({ip_address})... connected.") print(f"HTTP request sent, awaiting response... {resp.status} {resp.reason}") - - if resp.status in [301, 302]: - url = str(resp.url) # Final URL after redirection - print(f"Location: {url}") - parsed_url = urlparse(url) - ip_address = socket.gethostbyname(parsed_url.hostname) - print(f"--{time.strftime('%Y-%m-%d %H:%M:%S')}-- {url}") - print(f"Resolving {parsed_url.hostname} ({parsed_url.hostname})... {ip_address}") - print(f"Connecting to {parsed_url.hostname} ({ip_address})... connected.") - print(f"HTTP request sent, awaiting response... {resp.status} {resp.reason}") - file_size = int(resp.headers['Content-Length']) content_type = resp.headers.get('Content-Type', 'application/octet-stream') print(f"Length: {file_size} ({file_size // (1024 * 1024):.1f}M) [{content_type}]") print(f"Saving to: '{filename}'\n") - chunks = range(0, file_size, chunk_size) # Create an empty file From a3f891be55408699a0f8302058c37a9a8bcf3940 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Tue, 24 Sep 2024 03:48:36 +0530 Subject: [PATCH 17/50] Update main.py --- firerequests/main.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/firerequests/main.py b/firerequests/main.py index e9de3fd..c631f7e 100644 --- a/firerequests/main.py +++ b/firerequests/main.py @@ -58,12 +58,11 @@ async def download_file( async with session.head(url, allow_redirects=True) as resp: # Resolve the domain name and get IP address url = str(resp.url) + print(f"--{time.strftime('%Y-%m-%d %H:%M:%S')}-- {url}") parsed_url = urlparse(url) ip_address = socket.gethostbyname(parsed_url.hostname) - # Print wget-like headers - print(f"--{time.strftime('%Y-%m-%d %H:%M:%S')}-- {url}") print(f"Resolving {parsed_url.hostname} ({parsed_url.hostname})... {ip_address}") - print(f"Connecting to {parsed_url.hostname} ({ip_address})... connected.") + print(f"Connecting to {parsed_url.hostname} ({parsed_url.hostname})|{ip_address}|:443... connected.") print(f"HTTP request sent, awaiting response... {resp.status} {resp.reason}") file_size = int(resp.headers['Content-Length']) content_type = resp.headers.get('Content-Type', 'application/octet-stream') From e4e181af66e80c1d49fe60a48ee18910b593bca6 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Tue, 24 Sep 2024 03:55:09 +0530 Subject: [PATCH 18/50] Update main.py --- firerequests/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/firerequests/main.py b/firerequests/main.py index c631f7e..f2d9ece 100644 --- a/firerequests/main.py +++ b/firerequests/main.py @@ -66,8 +66,8 @@ async def download_file( print(f"HTTP request sent, awaiting response... {resp.status} {resp.reason}") file_size = int(resp.headers['Content-Length']) content_type = resp.headers.get('Content-Type', 'application/octet-stream') - print(f"Length: {file_size} ({file_size // (1024 * 1024):.1f}M) [{content_type}]") - print(f"Saving to: '{filename}'\n") + print(f"Length: {file_size} ({file_size / (1024 * 1024 * 1024):.1f}G) [{content_type}]") + print(f"Saving to: β€˜{filename}’\n") chunks = range(0, file_size, chunk_size) # Create an empty file From 1d45edf7b90f297c1ba247f77bead63b67ca6ed0 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Tue, 24 Sep 2024 04:02:21 +0530 Subject: [PATCH 19/50] Update README.md --- README.md | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 638868f..2f631df 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ Install FireRequests using pip: ```bash -pip install firerequests +!pip install firerequests ``` ## Quick Start 🏁 @@ -30,16 +30,15 @@ Accelerate your downloads with just a few lines of code: from firerequests import FireRequests url = "https://mirror.clarkson.edu/zorinos/isos/17/Zorin-OS-17.2-Core-64-bit.iso" -filename = "Zorin-OS-17.2-Core-64-bit.iso" fr = FireRequests() -fr.download(url, filename) +fr.download(url) ``` ### Command Line Interface ```bash -fr download https://mirror.clarkson.edu/zorinos/isos/17/Zorin-OS-17.2-Core-64-bit.iso +!fr download https://mirror.clarkson.edu/zorinos/isos/17/Zorin-OS-17.2-Core-64-bit.iso ``` #### Parameters: @@ -101,10 +100,9 @@ fr.upload(file_path, parts_urls, chunk_size=2 * 1024 * 1024, max_files=10) from firerequests import FireRequests url = "https://example.com/largefile.iso" -filename = "largefile.iso" fr = FireRequests() -fr.compare(url, filename) +fr.compare(url) ``` ## License πŸ“„ From 3eec834efbfdeba8ef71c104fa7d94a3f76cd03a Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Wed, 25 Sep 2024 00:39:12 +0530 Subject: [PATCH 20/50] Update README.md --- README.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 2f631df..0e21e9d 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,10 @@ # FireRequests πŸ”₯ -[![PyPI version](https://img.shields.io/pypi/v/firerequests.svg)](https://pypi.org/project/firerequests/) [![License](https://img.shields.io/pypi/l/firerequests.svg)](https://github.com/rishiraj/firerequests/blob/main/LICENSE) [![Python version](https://img.shields.io/pypi/pyversions/firerequests.svg)](https://pypi.org/project/firerequests/) +

+ GitHub release + PyPi version + PyPI - Downloads +

**FireRequests** is a high-performance, asynchronous HTTP client library for Python, engineered to accelerate your file transfers. By harnessing advanced concepts like semaphores, exponential backoff with jitter, concurrency, and fault tolerance, FireRequests can achieve up to a **6x real-world speedup** in file downloads and uploads compared to traditional synchronous methods. From 1361b24dab9fb8a8cf8d49c077b995725ef5ed91 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Thu, 10 Oct 2024 01:55:14 +0530 Subject: [PATCH 21/50] Update main.py --- firerequests/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/firerequests/main.py b/firerequests/main.py index f2d9ece..cae5d5c 100644 --- a/firerequests/main.py +++ b/firerequests/main.py @@ -51,11 +51,11 @@ async def download_file( self, url: str, filename: str, max_files: int, chunk_size: int, headers: Optional[Dict[str, str]] = None, parallel_failures: int = 3, max_retries: int = 5, callback: Optional[Any] = None ): - headers = headers or {} + headers = headers or {"User-Agent": "Wget/1.21.2", "Accept": "*/*", "Accept-Encoding": "identity", "Connection": "Keep-Alive"} try: async with aiohttp.ClientSession() as session: # Follow redirects and get the final download URL - async with session.head(url, allow_redirects=True) as resp: + async with session.head(url, headers=headers, allow_redirects=True) as resp: # Resolve the domain name and get IP address url = str(resp.url) print(f"--{time.strftime('%Y-%m-%d %H:%M:%S')}-- {url}") From e1c2b99898efdcd007fbaee1d4f009a95ef486e4 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Thu, 10 Oct 2024 02:06:12 +0530 Subject: [PATCH 22/50] Update main.py --- firerequests/main.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/firerequests/main.py b/firerequests/main.py index cae5d5c..cbaf1ed 100644 --- a/firerequests/main.py +++ b/firerequests/main.py @@ -164,12 +164,38 @@ async def upload_chunk( print(f"Error in upload_chunk: {e}") def download(self, url: str, filename: Optional[str] = None, max_files: int = 10, chunk_size: int = 2 * 1024 * 1024): + """ + Downloads a file from a given URL asynchronously in chunks, with support for parallel downloads. + + Args: + url (str): The URL of the file to download. + filename (Optional[str]): The name of the file to save locally. If not provided, it will be extracted from the URL. + max_files (int): The maximum number of concurrent file download chunks. Defaults to 10. + chunk_size (int): The size of each chunk to download, in bytes. Defaults to 2MB. + + Usage: + - This function downloads the file in parallel chunks, speeding up the process. + - It manages asynchronous downloading using asyncio, making multiple requests in parallel. + """ # Extract filename from URL if not provided if filename is None: filename = os.path.basename(urlparse(url).path) asyncio.run(self.download_file(url, filename, max_files, chunk_size)) def upload(self, file_path: str, parts_urls: List[str], chunk_size: int = 2 * 1024 * 1024, max_files: int = 10): + """ + Uploads a file to multiple URLs in chunks asynchronously, with support for parallel uploads. + + Args: + file_path (str): The local path to the file to upload. + parts_urls (List[str]): A list of URLs where each part of the file will be uploaded. + chunk_size (int): The size of each chunk to upload, in bytes. Defaults to 2MB. + max_files (int): The maximum number of concurrent file upload chunks. Defaults to 10. + + Usage: + - The function divides the file into smaller chunks and uploads them in parallel to different URLs. + - It supports retries and failure handling through exponential backoff for improved robustness. + """ asyncio.run(self.upload_file(file_path, parts_urls, chunk_size, max_files)) def normal_download(self, url: str, filename: str): @@ -183,6 +209,18 @@ def normal_download(self, url: str, filename: str): progress_bar.close() def compare(self, url: str, filename: Optional[str] = None): + """ + Compares the time taken to download a file using both the normal (synchronous) method and the asynchronous method. + + Args: + url (str): The URL of the file to download. + filename (Optional[str]): The name of the file to save locally. If not provided, it will be extracted from the URL. + + Usage: + - The function first downloads the file using the traditional `requests` method and measures the time taken. + - It then downloads the same file using the asynchronous method and measures the time. + - Finally, it prints a comparison of both download times. + """ if filename is None: filename = os.path.basename(urlparse(url).path) try: From 012fbe08c9c1b82df7b2ac9ceef8d8b4b6ec4bfd Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Thu, 10 Oct 2024 02:33:09 +0530 Subject: [PATCH 23/50] Update main.py --- firerequests/main.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/firerequests/main.py b/firerequests/main.py index cbaf1ed..e061b6e 100644 --- a/firerequests/main.py +++ b/firerequests/main.py @@ -175,7 +175,6 @@ def download(self, url: str, filename: Optional[str] = None, max_files: int = 10 Usage: - This function downloads the file in parallel chunks, speeding up the process. - - It manages asynchronous downloading using asyncio, making multiple requests in parallel. """ # Extract filename from URL if not provided if filename is None: @@ -194,7 +193,6 @@ def upload(self, file_path: str, parts_urls: List[str], chunk_size: int = 2 * 10 Usage: - The function divides the file into smaller chunks and uploads them in parallel to different URLs. - - It supports retries and failure handling through exponential backoff for improved robustness. """ asyncio.run(self.upload_file(file_path, parts_urls, chunk_size, max_files)) @@ -218,7 +216,7 @@ def compare(self, url: str, filename: Optional[str] = None): Usage: - The function first downloads the file using the traditional `requests` method and measures the time taken. - - It then downloads the same file using the asynchronous method and measures the time. + - It then downloads the same file using the asynchronous `firerequests` method and measures the time taken. - Finally, it prints a comparison of both download times. """ if filename is None: From a1842f93904b87e2e6510970d4b122a6c052fedb Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Thu, 10 Oct 2024 02:48:35 +0530 Subject: [PATCH 24/50] Create FUNDING.yml --- .github/FUNDING.yml | 1 + 1 file changed, 1 insertion(+) create mode 100644 .github/FUNDING.yml diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 0000000..3c700c0 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1 @@ +open_collective: firerequests From e3b7bf915c902fe42a970149ffe9423445cff906 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Thu, 10 Oct 2024 03:13:41 +0530 Subject: [PATCH 25/50] Update FUNDING.yml --- .github/FUNDING.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index 3c700c0..130e82b 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -1 +1,2 @@ +custom: upi://pay?pa=sexy@axisb&pn=RISHIRAJ ACHARYA open_collective: firerequests From d7f016d5f78e0d591233fdd493f485719c8be0ed Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Thu, 10 Oct 2024 03:18:46 +0530 Subject: [PATCH 26/50] Update FUNDING.yml --- .github/FUNDING.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index 130e82b..3bab6aa 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -1,2 +1,2 @@ -custom: upi://pay?pa=sexy@axisb&pn=RISHIRAJ ACHARYA +custom: ["upi://pay?pa=sexy@axisb&pn=Rishiraj%20Acharya"] open_collective: firerequests From 3e61c982284d5938698a395ed25bc2a793aca251 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Thu, 10 Oct 2024 03:21:40 +0530 Subject: [PATCH 27/50] Update FUNDING.yml --- .github/FUNDING.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index 3bab6aa..2564cfc 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -1,2 +1,2 @@ -custom: ["upi://pay?pa=sexy@axisb&pn=Rishiraj%20Acharya"] +custom: upi://pay?pa=sexy@axisb&pn=Rishiraj%20Acharya open_collective: firerequests From d098b5e306ea46404d2c9e2f54bb3c8553889f8c Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Thu, 10 Oct 2024 03:53:56 +0530 Subject: [PATCH 28/50] Update README.md --- README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/README.md b/README.md index 0e21e9d..6fb4889 100644 --- a/README.md +++ b/README.md @@ -112,3 +112,15 @@ fr.compare(url) ## License πŸ“„ This project is licensed under the Apache License 2.0 - see the [LICENSE](https://github.com/rishiraj/firerequests/blob/main/LICENSE) file for details. + +Sponsors +-------- +Become a sponsor and get a logo here. The funds are used to defray the cost of development. + +## Open Source Collective sponsors +[![Backers on Open Collective](https://opencollective.com/firerequests/backers/badge.svg)](#backers) + +### Backers +[[Become a backer](https://opencollective.com/firerequests#backer)] + + From 0367e671ab54ba731110cdc700fa38f851e85856 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Thu, 10 Oct 2024 04:18:13 +0530 Subject: [PATCH 29/50] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6fb4889..f1991c4 100644 --- a/README.md +++ b/README.md @@ -113,7 +113,7 @@ fr.compare(url) This project is licensed under the Apache License 2.0 - see the [LICENSE](https://github.com/rishiraj/firerequests/blob/main/LICENSE) file for details. -Sponsors +Sponsors ❀️ -------- Become a sponsor and get a logo here. The funds are used to defray the cost of development. From 2703d8c5439d8313064ea3d6c846f3f535a5d9ce Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Thu, 10 Oct 2024 11:38:05 +0530 Subject: [PATCH 30/50] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index f1991c4..b0680dd 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ GitHub release PyPi version PyPI - Downloads + Open In Colab

**FireRequests** is a high-performance, asynchronous HTTP client library for Python, engineered to accelerate your file transfers. By harnessing advanced concepts like semaphores, exponential backoff with jitter, concurrency, and fault tolerance, FireRequests can achieve up to a **6x real-world speedup** in file downloads and uploads compared to traditional synchronous methods. From e3cc4655f2ddfee9d733209ed6b378d97ab3b6f0 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Thu, 10 Oct 2024 11:39:02 +0530 Subject: [PATCH 31/50] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b0680dd..f8c8230 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Open In Colab

-**FireRequests** is a high-performance, asynchronous HTTP client library for Python, engineered to accelerate your file transfers. By harnessing advanced concepts like semaphores, exponential backoff with jitter, concurrency, and fault tolerance, FireRequests can achieve up to a **6x real-world speedup** in file downloads and uploads compared to traditional synchronous methods. +**FireRequests** is a high-performance, asynchronous HTTP client library for Python, engineered to accelerate your file transfers. By harnessing advanced concepts like semaphores, exponential backoff with jitter, concurrency, and fault tolerance, FireRequests can achieve up to a **10x real-world speedup** in file downloads and uploads compared to traditional synchronous methods. ## Features πŸš€ From 9c3550f130ba27baf2d13a93b860a45e4e4fc205 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Fri, 11 Oct 2024 01:55:57 +0530 Subject: [PATCH 32/50] Update main.py --- firerequests/main.py | 52 +++++++++++++++++++++++++++++++------------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/firerequests/main.py b/firerequests/main.py index e061b6e..f78fda8 100644 --- a/firerequests/main.py +++ b/firerequests/main.py @@ -114,16 +114,28 @@ async def upload_file( parallel_failures: int = 3, max_retries: int = 5, callback: Optional[Any] = None ): file_size = os.path.getsize(file_path) - tasks = [] + part_size = file_size // len(parts_urls) + last_part_size = file_size - part_size * (len(parts_urls) - 1) # To handle any remaining bytes + semaphore = asyncio.Semaphore(max_files) + tasks = [] try: async with aiohttp.ClientSession() as session: for part_number, part_url in enumerate(parts_urls): - start = part_number * chunk_size + # Calculate start and stop positions for each part + if part_number == len(parts_urls) - 1: # For the last part, ensure we include the remaining bytes + start = part_number * part_size + size = last_part_size + else: + start = part_number * part_size + size = part_size + + # Start uploading the chunks for the given part tasks.append(self.upload_chunk_with_retries( - session, part_url, file_path, start, chunk_size, semaphore, parallel_failures, max_retries + session, part_url, file_path, start, size, chunk_size, semaphore, parallel_failures, max_retries )) - + + # Track progress using a progress bar progress_bar = tqdm(total=file_size, unit="B", unit_scale=True, desc="Uploading on πŸ”₯") for chunk_result in asyncio.as_completed(tasks): uploaded = await chunk_result @@ -135,33 +147,43 @@ async def upload_file( print(f"Error in upload_file: {e}") async def upload_chunk_with_retries( - self, session: ClientSession, url: str, file_path: str, start: int, chunk_size: int, + self, session: ClientSession, url: str, file_path: str, start: int, part_size: int, chunk_size: int, semaphore: asyncio.Semaphore, parallel_failures: int, max_retries: int ): async with semaphore: attempt = 0 while attempt <= max_retries: try: - return await self.upload_chunk(session, url, file_path, start, chunk_size) + # Adjust chunk upload for each part + return await self.upload_chunks(session, url, file_path, start, part_size, chunk_size) except Exception as e: if attempt == max_retries: raise e await asyncio.sleep(self.exponential_backoff(BASE_WAIT_TIME, attempt, MAX_WAIT_TIME)) attempt += 1 - async def upload_chunk( - self, session: ClientSession, url: str, file_path: str, start: int, chunk_size: int + async def upload_chunks( + self, session: ClientSession, url: str, file_path: str, start: int, part_size: int, chunk_size: int ): try: + # Upload in smaller chunks within each part range + total_uploaded = 0 async with aiofiles.open(file_path, 'rb') as f: - await f.seek(start) - chunk = await f.read(chunk_size) - headers = {'Content-Length': str(len(chunk))} - async with session.put(url, data=chunk, headers=headers) as response: - response.raise_for_status() - return len(chunk) + while total_uploaded < part_size: + await f.seek(start + total_uploaded) + chunk = await f.read(min(chunk_size, part_size - total_uploaded)) + if not chunk: + break + + headers = {'Content-Length': str(len(chunk))} + async with session.put(url, data=chunk, headers=headers) as response: + response.raise_for_status() + + total_uploaded += len(chunk) + return total_uploaded except Exception as e: - print(f"Error in upload_chunk: {e}") + print(f"Error in upload_chunks: {e}") + return 0 def download(self, url: str, filename: Optional[str] = None, max_files: int = 10, chunk_size: int = 2 * 1024 * 1024): """ From 7aee77ed2a41e52be611644e19f8b347a92fb039 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Fri, 11 Oct 2024 06:40:15 +0530 Subject: [PATCH 33/50] Update FUNDING.yml --- .github/FUNDING.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml index 2564cfc..f81c3b4 100644 --- a/.github/FUNDING.yml +++ b/.github/FUNDING.yml @@ -1,2 +1 @@ -custom: upi://pay?pa=sexy@axisb&pn=Rishiraj%20Acharya -open_collective: firerequests +buy_me_a_coffee: rishiraj From f4a1cfe1c3081eccd539c0a2746a7bee5caad4b7 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Fri, 11 Oct 2024 06:53:53 +0530 Subject: [PATCH 34/50] Update README.md --- README.md | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/README.md b/README.md index f8c8230..1be5de8 100644 --- a/README.md +++ b/README.md @@ -118,10 +118,4 @@ Sponsors ❀️ -------- Become a sponsor and get a logo here. The funds are used to defray the cost of development. -## Open Source Collective sponsors -[![Backers on Open Collective](https://opencollective.com/firerequests/backers/badge.svg)](#backers) - -### Backers -[[Become a backer](https://opencollective.com/firerequests#backer)] - - + From 2ef4f7da9cc42f987a217b5ac48cc980d6b78696 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Fri, 11 Oct 2024 07:09:28 +0530 Subject: [PATCH 35/50] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1be5de8..472dd18 100644 --- a/README.md +++ b/README.md @@ -118,4 +118,4 @@ Sponsors ❀️ -------- Become a sponsor and get a logo here. The funds are used to defray the cost of development. - +bmc-button From d2b2ff63e3be908f29d9bd66cccde824aaa417ee Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Fri, 11 Oct 2024 08:39:22 +0530 Subject: [PATCH 36/50] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 472dd18..24c5942 100644 --- a/README.md +++ b/README.md @@ -116,6 +116,8 @@ This project is licensed under the Apache License 2.0 - see the [LICENSE](https: Sponsors ❀️ -------- + + Become a sponsor and get a logo here. The funds are used to defray the cost of development. bmc-button From 760558fd73a55faa9ece67bc1811de35dccf0092 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Fri, 18 Oct 2024 13:41:27 +0530 Subject: [PATCH 37/50] Update main.py --- firerequests/main.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/firerequests/main.py b/firerequests/main.py index f78fda8..55e9c81 100644 --- a/firerequests/main.py +++ b/firerequests/main.py @@ -185,23 +185,35 @@ async def upload_chunks( print(f"Error in upload_chunks: {e}") return 0 - def download(self, url: str, filename: Optional[str] = None, max_files: int = 10, chunk_size: int = 2 * 1024 * 1024): + def download(self, urls: Union[str, List[str]], filenames: Optional[Union[str, List[str]]] = None, headers: Optional[Dict[str, str]] = None, max_files: int = 10, chunk_size: int = 2 * 1024 * 1024): """ - Downloads a file from a given URL asynchronously in chunks, with support for parallel downloads. + Downloads files from a given URL or a list of URLs asynchronously in chunks, with support for parallel downloads. Args: - url (str): The URL of the file to download. - filename (Optional[str]): The name of the file to save locally. If not provided, it will be extracted from the URL. + urls (Union[str, List[str]]): The URL or list of URLs of the files to download. + filenames (Optional[Union[str, List[str]]]): The filename or list of filenames to save locally. + If not provided, filenames will be extracted from the URLs. max_files (int): The maximum number of concurrent file download chunks. Defaults to 10. chunk_size (int): The size of each chunk to download, in bytes. Defaults to 2MB. Usage: - - This function downloads the file in parallel chunks, speeding up the process. + - This function downloads the files in parallel chunks, speeding up the process. """ - # Extract filename from URL if not provided - if filename is None: - filename = os.path.basename(urlparse(url).path) - asyncio.run(self.download_file(url, filename, max_files, chunk_size)) + if isinstance(urls, str): + urls = [urls] + if isinstance(filenames, str): + filenames = [filenames] + + if filenames is None: + filenames = [os.path.basename(urlparse(url).path) for url in urls] + elif len(filenames) != len(urls): + raise ValueError("The number of filenames must match the number of URLs") + + async def download_all(): + tasks = [self.download_file(url, filename, max_files, chunk_size, headers) for url, filename in zip(urls, filenames)] + await asyncio.gather(*tasks) + + asyncio.run(download_all()) def upload(self, file_path: str, parts_urls: List[str], chunk_size: int = 2 * 1024 * 1024, max_files: int = 10): """ From 5423d886e17fe803150bdaa4c30a10938fd6a4f2 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Fri, 18 Oct 2024 13:45:34 +0530 Subject: [PATCH 38/50] Update main.py --- firerequests/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/firerequests/main.py b/firerequests/main.py index 55e9c81..6d80d34 100644 --- a/firerequests/main.py +++ b/firerequests/main.py @@ -13,7 +13,7 @@ from tqdm.asyncio import tqdm from functools import partial from concurrent.futures import ThreadPoolExecutor -from typing import Dict, Any, List, Optional +from typing import Union, Dict, Any, List, Optional # Enable nested event loops for environments like Jupyter nest_asyncio.apply() From 6b07e9c8d3a07eafbc4e5d6087008e83620bd2c7 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Fri, 18 Oct 2024 14:25:57 +0530 Subject: [PATCH 39/50] Update main.py --- firerequests/main.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/firerequests/main.py b/firerequests/main.py index 6d80d34..ec4c268 100644 --- a/firerequests/main.py +++ b/firerequests/main.py @@ -49,7 +49,7 @@ async def download_chunk( async def download_file( self, url: str, filename: str, max_files: int, chunk_size: int, headers: Optional[Dict[str, str]] = None, - parallel_failures: int = 3, max_retries: int = 5, callback: Optional[Any] = None + parallel_failures: int = 3, max_retries: int = 5, callback: Optional[Any] = None, show_progress: bool = True ): headers = headers or {"User-Agent": "Wget/1.21.2", "Accept": "*/*", "Accept-Encoding": "identity", "Connection": "Keep-Alive"} try: @@ -82,14 +82,22 @@ async def download_file( tasks.append(self.download_chunk_with_retries( session, url, filename, start, stop, headers, semaphore, parallel_failures, max_retries )) - - progress_bar = tqdm(total=file_size, unit="B", unit_scale=True, desc="Downloading on πŸ”₯") + + if show_progress: + progress_bar = tqdm(total=file_size, unit="B", unit_scale=True, desc="Downloading on πŸ”₯") + else: + progress_bar = None + for chunk_result in asyncio.as_completed(tasks): downloaded = await chunk_result - progress_bar.update(downloaded) + if progress_bar: + progress_bar.update(downloaded) if callback: await callback(downloaded) - progress_bar.close() + + if progress_bar: + progress_bar.close() + except Exception as e: print(f"Error in download_file: {e}") @@ -185,7 +193,7 @@ async def upload_chunks( print(f"Error in upload_chunks: {e}") return 0 - def download(self, urls: Union[str, List[str]], filenames: Optional[Union[str, List[str]]] = None, headers: Optional[Dict[str, str]] = None, max_files: int = 10, chunk_size: int = 2 * 1024 * 1024): + def download(self, urls: Union[str, List[str]], filenames: Optional[Union[str, List[str]]] = None, headers: Optional[Dict[str, str]] = None, max_files: int = 10, chunk_size: int = 2 * 1024 * 1024, show_progress: bool = True): """ Downloads files from a given URL or a list of URLs asynchronously in chunks, with support for parallel downloads. @@ -193,8 +201,10 @@ def download(self, urls: Union[str, List[str]], filenames: Optional[Union[str, L urls (Union[str, List[str]]): The URL or list of URLs of the files to download. filenames (Optional[Union[str, List[str]]]): The filename or list of filenames to save locally. If not provided, filenames will be extracted from the URLs. + headers (Optional[Dict[str, str]]): Headers to include in the download requests. max_files (int): The maximum number of concurrent file download chunks. Defaults to 10. chunk_size (int): The size of each chunk to download, in bytes. Defaults to 2MB. + show_progress (bool): Whether to show a progress bar. Defaults to True. Usage: - This function downloads the files in parallel chunks, speeding up the process. @@ -210,7 +220,7 @@ def download(self, urls: Union[str, List[str]], filenames: Optional[Union[str, L raise ValueError("The number of filenames must match the number of URLs") async def download_all(): - tasks = [self.download_file(url, filename, max_files, chunk_size, headers) for url, filename in zip(urls, filenames)] + tasks = [self.download_file(url, filename, max_files, chunk_size, headers, show_progress=show_progress) for url, filename in zip(urls, filenames)] await asyncio.gather(*tasks) asyncio.run(download_all()) From 32782a1cabb224b0842eec28f50c115e7888a658 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Fri, 18 Oct 2024 15:03:55 +0530 Subject: [PATCH 40/50] Update main.py --- firerequests/main.py | 48 +++++++++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/firerequests/main.py b/firerequests/main.py index ec4c268..c91bccb 100644 --- a/firerequests/main.py +++ b/firerequests/main.py @@ -85,17 +85,15 @@ async def download_file( if show_progress: progress_bar = tqdm(total=file_size, unit="B", unit_scale=True, desc="Downloading on πŸ”₯") - else: - progress_bar = None for chunk_result in asyncio.as_completed(tasks): downloaded = await chunk_result - if progress_bar: + if show_progress: progress_bar.update(downloaded) if callback: await callback(downloaded) - if progress_bar: + if show_progress: progress_bar.close() except Exception as e: @@ -119,38 +117,41 @@ async def download_chunk_with_retries( async def upload_file( self, file_path: str, parts_urls: List[str], chunk_size: int, max_files: int, - parallel_failures: int = 3, max_retries: int = 5, callback: Optional[Any] = None + parallel_failures: int = 3, max_retries: int = 5, callback: Optional[Any] = None, show_progress: bool = True ): file_size = os.path.getsize(file_path) part_size = file_size // len(parts_urls) last_part_size = file_size - part_size * (len(parts_urls) - 1) # To handle any remaining bytes - + semaphore = asyncio.Semaphore(max_files) tasks = [] try: async with aiohttp.ClientSession() as session: for part_number, part_url in enumerate(parts_urls): - # Calculate start and stop positions for each part - if part_number == len(parts_urls) - 1: # For the last part, ensure we include the remaining bytes + if part_number == len(parts_urls) - 1: start = part_number * part_size size = last_part_size else: start = part_number * part_size size = part_size - - # Start uploading the chunks for the given part + tasks.append(self.upload_chunk_with_retries( session, part_url, file_path, start, size, chunk_size, semaphore, parallel_failures, max_retries )) - - # Track progress using a progress bar - progress_bar = tqdm(total=file_size, unit="B", unit_scale=True, desc="Uploading on πŸ”₯") + + if show_progress: + progress_bar = tqdm(total=file_size, unit="B", unit_scale=True, desc="Uploading on πŸ”₯") + for chunk_result in asyncio.as_completed(tasks): uploaded = await chunk_result - progress_bar.update(uploaded) + if show_progress: + progress_bar.update(uploaded) if callback: await callback(uploaded) - progress_bar.close() + + if show_progress: + progress_bar.close() + except Exception as e: print(f"Error in upload_file: {e}") @@ -193,7 +194,7 @@ async def upload_chunks( print(f"Error in upload_chunks: {e}") return 0 - def download(self, urls: Union[str, List[str]], filenames: Optional[Union[str, List[str]]] = None, headers: Optional[Dict[str, str]] = None, max_files: int = 10, chunk_size: int = 2 * 1024 * 1024, show_progress: bool = True): + def download(self, urls: Union[str, List[str]], filenames: Optional[Union[str, List[str]]] = None, headers: Optional[Dict[str, str]] = None, max_files: int = 10, chunk_size: int = 2 * 1024 * 1024, show_progress: Optional[bool] = None): """ Downloads files from a given URL or a list of URLs asynchronously in chunks, with support for parallel downloads. @@ -204,7 +205,7 @@ def download(self, urls: Union[str, List[str]], filenames: Optional[Union[str, L headers (Optional[Dict[str, str]]): Headers to include in the download requests. max_files (int): The maximum number of concurrent file download chunks. Defaults to 10. chunk_size (int): The size of each chunk to download, in bytes. Defaults to 2MB. - show_progress (bool): Whether to show a progress bar. Defaults to True. + show_progress (Optional[bool]): Whether to show a progress bar. Defaults to True for single file, False for multiple files. Usage: - This function downloads the files in parallel chunks, speeding up the process. @@ -213,19 +214,23 @@ def download(self, urls: Union[str, List[str]], filenames: Optional[Union[str, L urls = [urls] if isinstance(filenames, str): filenames = [filenames] - + if filenames is None: filenames = [os.path.basename(urlparse(url).path) for url in urls] elif len(filenames) != len(urls): raise ValueError("The number of filenames must match the number of URLs") - + + # Set default for show_progress based on whether it's a single file or list + if show_progress is None: + show_progress = len(urls) == 1 + async def download_all(): tasks = [self.download_file(url, filename, max_files, chunk_size, headers, show_progress=show_progress) for url, filename in zip(urls, filenames)] await asyncio.gather(*tasks) asyncio.run(download_all()) - def upload(self, file_path: str, parts_urls: List[str], chunk_size: int = 2 * 1024 * 1024, max_files: int = 10): + def upload(self, file_path: str, parts_urls: List[str], chunk_size: int = 2 * 1024 * 1024, max_files: int = 10, show_progress: Optional[bool] = True): """ Uploads a file to multiple URLs in chunks asynchronously, with support for parallel uploads. @@ -234,11 +239,12 @@ def upload(self, file_path: str, parts_urls: List[str], chunk_size: int = 2 * 10 parts_urls (List[str]): A list of URLs where each part of the file will be uploaded. chunk_size (int): The size of each chunk to upload, in bytes. Defaults to 2MB. max_files (int): The maximum number of concurrent file upload chunks. Defaults to 10. + show_progress (bool): Whether to show a progress bar during upload. Defaults to True. Usage: - The function divides the file into smaller chunks and uploads them in parallel to different URLs. """ - asyncio.run(self.upload_file(file_path, parts_urls, chunk_size, max_files)) + asyncio.run(self.upload_file(file_path, parts_urls, chunk_size, max_files, show_progress=show_progress)) def normal_download(self, url: str, filename: str): response = requests.get(url, stream=True) From 03941b6a21a45a733e41938f9544309b12a6162a Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Fri, 18 Oct 2024 15:17:35 +0530 Subject: [PATCH 41/50] Update README.md --- README.md | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 24c5942..23575f8 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,8 @@ fr.download(url) - `--filename` (optional): The name to save the downloaded file. Defaults to filename from URL. - `--max_files` (optional): The number of concurrent file chunks. Defaults to 10. - `--chunk_size` (optional): The size of each chunk in bytes. Defaults to `2 * 1024 * 1024` (2 MB). +- `--headers` (optional): A dictionary of headers to include in the download request. +- `--show_progress` (optional): Whether to show a progress bar. Defaults to True for single file downloads, and False for multiple files. ## Real-World Speed Test 🏎️ @@ -75,17 +77,19 @@ Downloading on πŸ”₯: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 3.42G/3.42G [02:38<00: ```python from firerequests import FireRequests -url = "https://example.com/largefile.iso" -filename = "largefile.iso" +urls = ["https://example.com/file1.iso", "https://example.com/file2.iso"] +filenames = ["file1.iso", "file2.iso"] fr = FireRequests() -fr.download(url, filename, max_files=10, chunk_size=2 * 1024 * 1024) +fr.download(urls, filenames, max_files=10, chunk_size=2 * 1024 * 1024, headers={"Authorization": "Bearer token"}, show_progress=True) ``` -- **`url`**: The URL of the file to download. -- **`filename`**: The local filename to save the downloaded file. -- **`max_files`**: The maximum number of concurrent chunk downloads. -- **`chunk_size`**: The size of each chunk in bytes. +- **`urls`**: The URL or list of URLs of the file(s) to download. +- **`filenames`**: The filename(s) to save the downloaded file(s). If not provided, filenames are extracted from the URLs. +- **`max_files`**: The maximum number of concurrent chunk downloads. Defaults to 10. +- **`chunk_size`**: The size of each chunk in bytes. Defaults to `2 * 1024 * 1024` (2 MB). +- **`headers`**: A dictionary of headers to include in the download request (optional). +- **`show_progress`**: Whether to show a progress bar during download. Defaults to `True` for a single file, and `False` for multiple files (optional). ### Uploading Files @@ -96,9 +100,15 @@ file_path = "largefile.iso" parts_urls = ["https://example.com/upload_part1", "https://example.com/upload_part2", ...] fr = FireRequests() -fr.upload(file_path, parts_urls, chunk_size=2 * 1024 * 1024, max_files=10) +fr.upload(file_path, parts_urls, chunk_size=2 * 1024 * 1024, max_files=10, show_progress=True) ``` +- **`file_path`**: The local path to the file to upload. +- **`parts_urls`**: A list of URLs where each part of the file will be uploaded. +- **`chunk_size`**: The size of each chunk in bytes. Defaults to `2 * 1024 * 1024` (2 MB). +- **`max_files`**: The maximum number of concurrent chunk uploads. Defaults to 10. +- **`show_progress`**: Whether to show a progress bar during upload. Defaults to `True`. + ### Comparing Download Speed ```python From 5e81e7cb9849b72eaac36dbd5fec1b03510e3572 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Fri, 1 Nov 2024 01:55:32 +0530 Subject: [PATCH 42/50] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 23575f8..411161c 100644 --- a/README.md +++ b/README.md @@ -47,8 +47,8 @@ fr.download(url) ``` #### Parameters: -- `url` (required): The URL to download the file from. -- `--filename` (optional): The name to save the downloaded file. Defaults to filename from URL. +- `urls` (required): The URL to download the file from. +- `--filenames` (optional): The name to save the downloaded file. Defaults to filename from URL. - `--max_files` (optional): The number of concurrent file chunks. Defaults to 10. - `--chunk_size` (optional): The size of each chunk in bytes. Defaults to `2 * 1024 * 1024` (2 MB). - `--headers` (optional): A dictionary of headers to include in the download request. From 79711ed42a30c39b0a992c6ec15897531438682a Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Mon, 4 Nov 2024 15:58:37 +0530 Subject: [PATCH 43/50] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 411161c..43e27d4 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@

GitHub release PyPi version - PyPI - Downloads + PyPI Downloads Open In Colab

From dc8258cdb80c52be7f165fcfb78ef8b0f572197c Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Wed, 6 Nov 2024 13:41:45 +0530 Subject: [PATCH 44/50] add generate feature --- README.md | 32 ++++++++++++++++++- firerequests/main.py | 75 ++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 3 +- setup.py | 2 +- 4 files changed, 109 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 43e27d4..b49b132 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Open In Colab

-**FireRequests** is a high-performance, asynchronous HTTP client library for Python, engineered to accelerate your file transfers. By harnessing advanced concepts like semaphores, exponential backoff with jitter, concurrency, and fault tolerance, FireRequests can achieve up to a **10x real-world speedup** in file downloads and uploads compared to traditional synchronous methods. +**FireRequests** is a high-performance, asynchronous HTTP client library for Python, engineered to accelerate your file transfers. By harnessing advanced concepts like semaphores, exponential backoff with jitter, concurrency, and fault tolerance, FireRequests can achieve up to a **10x real-world speedup** in file downloads and uploads compared to traditional synchronous methods and enables scalable, parallelized LLM interactions with providers like OpenAI and Google. ## Features πŸš€ @@ -15,6 +15,7 @@ - **Concurrent Transfers**: Uses `asyncio.Semaphore` to limit simultaneous tasks, optimizing performance by managing system resources effectively. - **Fault Tolerance**: Retries failed tasks with exponentially increasing wait times, adding random jitter to prevent network congestion. - **Chunked Processing**: Files are split into configurable chunks for parallel processing, significantly accelerating uploads/downloads. +- **Parallelized LLM Generation**: Efficiently handles large-scale language model requests from OpenAI and Google with configurable parallelism. - **Compatibility**: Supports environments like Jupyter through `nest_asyncio`, enabling reusable `asyncio` loops for both batch and interactive Jupyter use. ## Installation πŸ“¦ @@ -120,6 +121,35 @@ fr = FireRequests() fr.compare(url) ``` +### Generating Text with LLMs + +FireRequests supports generating responses from LLMs like OpenAI’s and Google’s generative models in parallel batches. + +```python +from firerequests import FireRequests + +# Initialize FireRequests +fr = FireRequests() + +# Set parameters +provider = "openai" +model = "gpt-4o-mini" +system_prompt = "Provide concise answers." +user_prompts = ["What is AI?", "Explain quantum computing.", "What is Bitcoin?", "Explain neural networks."] +parallel_requests = 2 + +# Generate responses +responses = fr.generate( + provider=provider, + model=model, + system_prompt=system_prompt, + user_prompts=user_prompts, + parallel_requests=parallel_requests +) + +print(responses) +``` + ## License πŸ“„ This project is licensed under the Apache License 2.0 - see the [LICENSE](https://github.com/rishiraj/firerequests/blob/main/LICENSE) file for details. diff --git a/firerequests/main.py b/firerequests/main.py index c91bccb..aaa3ca6 100644 --- a/firerequests/main.py +++ b/firerequests/main.py @@ -287,5 +287,80 @@ def compare(self, url: str, filename: Optional[str] = None): except Exception as e: print(f"Error in compare: {e}") + async def call_openai(self, model: str, system_prompt: str, user_prompt: str) -> str: + from openai import OpenAI + client = OpenAI(api_key=os.environ["OPENAI_API_KEY"]) + completion = client.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt} + ] + ) + return completion.choices[0].message.content + + async def call_google(self, model: str, system_prompt: str, user_prompt: str) -> str: + import google.generativeai as genai + genai.configure(api_key=os.environ["GEMINI_API_KEY"]) + + generation_config = { + "temperature": 1, + "top_p": 0.95, + "top_k": 40, + "max_output_tokens": 8192, + "response_mime_type": "text/plain", + } + + model_instance = genai.GenerativeModel( + model_name=model, + generation_config=generation_config, + system_instruction=system_prompt, + ) + + chat_session = model_instance.start_chat(history=[]) + response = chat_session.send_message(user_prompt) + return response.text + + async def generate_batch( + self, provider: str, model: str, system_prompt: str, user_prompts: List[str] + ) -> List[str]: + tasks = [] + for user_prompt in user_prompts: + if provider.lower() == "openai": + tasks.append(self.call_openai(model, system_prompt, user_prompt)) + elif provider.lower() == "google": + tasks.append(self.call_google(model, system_prompt, user_prompt)) + else: + raise ValueError("Unsupported provider. Choose either 'openai' or 'google'.") + + responses = await asyncio.gather(*tasks) + return responses + + def generate( + self, provider: str, model: str, system_prompt: str, user_prompts: List[str], parallel_requests: int = 10 + ) -> List[str]: + """ + Generates responses for the given list of user prompts in parallel batches. + + Args: + provider (str): The API provider to use, either "openai" or "google". + model (str): The model to use for generating responses. + system_prompt (str): The system message prompt to include in each request. + user_prompts (List[str]): List of user messages for generation. + parallel_requests (int): Number of parallel requests to make. + + Returns: + List[str]: List of generated responses corresponding to each user prompt. + """ + async def generate_all(): + all_responses = [] + for i in range(0, len(user_prompts), parallel_requests): + batch_prompts = user_prompts[i:i + parallel_requests] + batch_responses = await self.generate_batch(provider, model, system_prompt, batch_prompts) + all_responses.extend(batch_responses) + return all_responses + + return self.loop.run_until_complete(generate_all()) + def main(): fire.Fire(FireRequests) diff --git a/requirements.txt b/requirements.txt index cc1222a..b3d331d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,5 +3,6 @@ aiofiles requests nest_asyncio tqdm -httpx fire +google-generativeai +openai \ No newline at end of file diff --git a/setup.py b/setup.py index f057aa2..8ea834a 100644 --- a/setup.py +++ b/setup.py @@ -46,7 +46,7 @@ "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", ], - python_requires=">=3.9", + python_requires=">=3.8", entry_points={ "console_scripts": [ "fr=firerequests.main:main", From a8a652c3486f35a2454f5ae032a7af53f3a4f553 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Wed, 6 Nov 2024 15:41:22 +0530 Subject: [PATCH 45/50] Update main.py --- firerequests/main.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/firerequests/main.py b/firerequests/main.py index aaa3ca6..2f9c8ca 100644 --- a/firerequests/main.py +++ b/firerequests/main.py @@ -287,7 +287,7 @@ def compare(self, url: str, filename: Optional[str] = None): except Exception as e: print(f"Error in compare: {e}") - async def call_openai(self, model: str, system_prompt: str, user_prompt: str) -> str: + def call_openai_sync(self, model: str, system_prompt: str, user_prompt: str) -> str: from openai import OpenAI client = OpenAI(api_key=os.environ["OPENAI_API_KEY"]) completion = client.chat.completions.create( @@ -299,7 +299,10 @@ async def call_openai(self, model: str, system_prompt: str, user_prompt: str) -> ) return completion.choices[0].message.content - async def call_google(self, model: str, system_prompt: str, user_prompt: str) -> str: + async def call_openai(self, model: str, system_prompt: str, user_prompt: str) -> str: + return await asyncio.to_thread(self.call_openai_sync, model, system_prompt, user_prompt) + + def call_google_sync(self, model: str, system_prompt: str, user_prompt: str) -> str: import google.generativeai as genai genai.configure(api_key=os.environ["GEMINI_API_KEY"]) @@ -321,6 +324,9 @@ async def call_google(self, model: str, system_prompt: str, user_prompt: str) -> response = chat_session.send_message(user_prompt) return response.text + async def call_google(self, model: str, system_prompt: str, user_prompt: str) -> str: + return await asyncio.to_thread(self.call_google_sync, model, system_prompt, user_prompt) + async def generate_batch( self, provider: str, model: str, system_prompt: str, user_prompts: List[str] ) -> List[str]: From 33c23295bd49bc21b81b453c41c6394e60477d24 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Thu, 7 Nov 2024 07:08:34 +0530 Subject: [PATCH 46/50] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index b49b132..cc76ad9 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ - **Concurrent Transfers**: Uses `asyncio.Semaphore` to limit simultaneous tasks, optimizing performance by managing system resources effectively. - **Fault Tolerance**: Retries failed tasks with exponentially increasing wait times, adding random jitter to prevent network congestion. - **Chunked Processing**: Files are split into configurable chunks for parallel processing, significantly accelerating uploads/downloads. -- **Parallelized LLM Generation**: Efficiently handles large-scale language model requests from OpenAI and Google with configurable parallelism. +- **Parallel LLM Call**: Efficiently handles large-scale language model requests from OpenAI and Google with configurable parallelism. - **Compatibility**: Supports environments like Jupyter through `nest_asyncio`, enabling reusable `asyncio` loops for both batch and interactive Jupyter use. ## Installation πŸ“¦ @@ -123,7 +123,7 @@ fr.compare(url) ### Generating Text with LLMs -FireRequests supports generating responses from LLMs like OpenAI’s and Google’s generative models in parallel batches. +FireRequests supports generating responses from LLMs like OpenAI’s and Google’s generative models in parallel batches. This currently doesn't work in Colab. ```python from firerequests import FireRequests From 34036768d4a12da955bab20cfa42c70196662c89 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Thu, 7 Nov 2024 07:08:55 +0530 Subject: [PATCH 47/50] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cc76ad9..cf213d7 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ - **Concurrent Transfers**: Uses `asyncio.Semaphore` to limit simultaneous tasks, optimizing performance by managing system resources effectively. - **Fault Tolerance**: Retries failed tasks with exponentially increasing wait times, adding random jitter to prevent network congestion. - **Chunked Processing**: Files are split into configurable chunks for parallel processing, significantly accelerating uploads/downloads. -- **Parallel LLM Call**: Efficiently handles large-scale language model requests from OpenAI and Google with configurable parallelism. +- **Parallel LLM Calls**: Efficiently handles large-scale language model requests from OpenAI and Google with configurable parallelism. - **Compatibility**: Supports environments like Jupyter through `nest_asyncio`, enabling reusable `asyncio` loops for both batch and interactive Jupyter use. ## Installation πŸ“¦ From 7a5713e6883dda5c4f31f875b6a9f0202fce9bf1 Mon Sep 17 00:00:00 2001 From: Soumik Rakshit <19soumik.rakshit96@gmail.com> Date: Wed, 19 Mar 2025 07:24:01 +0530 Subject: [PATCH 48/50] update: make FireRequests.download() return filenames --- firerequests/main.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/firerequests/main.py b/firerequests/main.py index 2f9c8ca..eb14283 100644 --- a/firerequests/main.py +++ b/firerequests/main.py @@ -9,10 +9,7 @@ import fire from urllib.parse import urlparse from aiohttp import ClientSession -from aiofiles.os import remove from tqdm.asyncio import tqdm -from functools import partial -from concurrent.futures import ThreadPoolExecutor from typing import Union, Dict, Any, List, Optional # Enable nested event loops for environments like Jupyter @@ -194,7 +191,7 @@ async def upload_chunks( print(f"Error in upload_chunks: {e}") return 0 - def download(self, urls: Union[str, List[str]], filenames: Optional[Union[str, List[str]]] = None, headers: Optional[Dict[str, str]] = None, max_files: int = 10, chunk_size: int = 2 * 1024 * 1024, show_progress: Optional[bool] = None): + def download(self, urls: Union[str, List[str]], filenames: Optional[Union[str, List[str]]] = None, headers: Optional[Dict[str, str]] = None, max_files: int = 10, chunk_size: int = 2 * 1024 * 1024, show_progress: Optional[bool] = None) -> List[str]: """ Downloads files from a given URL or a list of URLs asynchronously in chunks, with support for parallel downloads. @@ -229,6 +226,8 @@ async def download_all(): await asyncio.gather(*tasks) asyncio.run(download_all()) + + return filenames def upload(self, file_path: str, parts_urls: List[str], chunk_size: int = 2 * 1024 * 1024, max_files: int = 10, show_progress: Optional[bool] = True): """ From 1befe2e9ad13b53c93f802d4cc983b8e14c920fc Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Fri, 11 Apr 2025 11:48:49 +0530 Subject: [PATCH 49/50] added progress_desc optional parameter --- firerequests/main.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/firerequests/main.py b/firerequests/main.py index eb14283..d507d22 100644 --- a/firerequests/main.py +++ b/firerequests/main.py @@ -46,7 +46,7 @@ async def download_chunk( async def download_file( self, url: str, filename: str, max_files: int, chunk_size: int, headers: Optional[Dict[str, str]] = None, - parallel_failures: int = 3, max_retries: int = 5, callback: Optional[Any] = None, show_progress: bool = True + parallel_failures: int = 3, max_retries: int = 5, callback: Optional[Any] = None, show_progress: bool = True, progress_desc: Optional[str] = None ): headers = headers or {"User-Agent": "Wget/1.21.2", "Accept": "*/*", "Accept-Encoding": "identity", "Connection": "Keep-Alive"} try: @@ -81,7 +81,7 @@ async def download_file( )) if show_progress: - progress_bar = tqdm(total=file_size, unit="B", unit_scale=True, desc="Downloading on πŸ”₯") + progress_bar = tqdm(total=file_size, unit="B", unit_scale=True, desc=progress_desc) for chunk_result in asyncio.as_completed(tasks): downloaded = await chunk_result @@ -114,7 +114,7 @@ async def download_chunk_with_retries( async def upload_file( self, file_path: str, parts_urls: List[str], chunk_size: int, max_files: int, - parallel_failures: int = 3, max_retries: int = 5, callback: Optional[Any] = None, show_progress: bool = True + parallel_failures: int = 3, max_retries: int = 5, callback: Optional[Any] = None, show_progress: bool = True, progress_desc: Optional[str] = None ): file_size = os.path.getsize(file_path) part_size = file_size // len(parts_urls) @@ -137,7 +137,7 @@ async def upload_file( )) if show_progress: - progress_bar = tqdm(total=file_size, unit="B", unit_scale=True, desc="Uploading on πŸ”₯") + progress_bar = tqdm(total=file_size, unit="B", unit_scale=True, desc=progress_desc) for chunk_result in asyncio.as_completed(tasks): uploaded = await chunk_result From 1a070fb2edad316f6bc8bd314ec70ff29e0c8127 Mon Sep 17 00:00:00 2001 From: Rishiraj Acharya <44090649+rishiraj@users.noreply.github.com> Date: Mon, 12 May 2025 11:08:49 +0530 Subject: [PATCH 50/50] remove llm provider dependency --- README.md | 42 ++++++++++-------- firerequests/main.py | 100 +++++++++++-------------------------------- requirements.txt | 4 +- 3 files changed, 52 insertions(+), 94 deletions(-) diff --git a/README.md b/README.md index cf213d7..8c70d8c 100644 --- a/README.md +++ b/README.md @@ -123,7 +123,7 @@ fr.compare(url) ### Generating Text with LLMs -FireRequests supports generating responses from LLMs like OpenAI’s and Google’s generative models in parallel batches. This currently doesn't work in Colab. +FireRequests allows you to run LLM API calls (like OpenAI or Google) in parallel batches using a decorator. This keeps the library lightweight and lets users supply their own logic for calling APIs. This approach currently doesn't work in Colab. ```python from firerequests import FireRequests @@ -131,22 +131,30 @@ from firerequests import FireRequests # Initialize FireRequests fr = FireRequests() -# Set parameters -provider = "openai" -model = "gpt-4o-mini" -system_prompt = "Provide concise answers." -user_prompts = ["What is AI?", "Explain quantum computing.", "What is Bitcoin?", "Explain neural networks."] -parallel_requests = 2 - -# Generate responses -responses = fr.generate( - provider=provider, - model=model, - system_prompt=system_prompt, - user_prompts=user_prompts, - parallel_requests=parallel_requests -) - +# Use the decorator to define your own prompt function +@fr.op(max_reqs=2, prompts=[ + "What is AI?", + "Explain quantum computing.", + "What is Bitcoin?", + "Explain neural networks." +]) +def generate(system: str = "Provide concise answers.", prompt: str = ""): + # You can use OpenAI, Google, or any other LLM API here + from openai import OpenAI + import os + + client = OpenAI(api_key=os.environ["OPENAI_API_KEY"]) + response = client.chat.completions.create( + model="gpt-4o-mini", + messages=[ + {"role": "system", "content": system}, + {"role": "user", "content": prompt} + ] + ) + return response.choices[0].message.content + +# Call your decorated function +responses = generate() print(responses) ``` diff --git a/firerequests/main.py b/firerequests/main.py index d507d22..e34422e 100644 --- a/firerequests/main.py +++ b/firerequests/main.py @@ -286,86 +286,38 @@ def compare(self, url: str, filename: Optional[str] = None): except Exception as e: print(f"Error in compare: {e}") - def call_openai_sync(self, model: str, system_prompt: str, user_prompt: str) -> str: - from openai import OpenAI - client = OpenAI(api_key=os.environ["OPENAI_API_KEY"]) - completion = client.chat.completions.create( - model=model, - messages=[ - {"role": "system", "content": system_prompt}, - {"role": "user", "content": user_prompt} - ] - ) - return completion.choices[0].message.content - - async def call_openai(self, model: str, system_prompt: str, user_prompt: str) -> str: - return await asyncio.to_thread(self.call_openai_sync, model, system_prompt, user_prompt) - - def call_google_sync(self, model: str, system_prompt: str, user_prompt: str) -> str: - import google.generativeai as genai - genai.configure(api_key=os.environ["GEMINI_API_KEY"]) - - generation_config = { - "temperature": 1, - "top_p": 0.95, - "top_k": 40, - "max_output_tokens": 8192, - "response_mime_type": "text/plain", - } - - model_instance = genai.GenerativeModel( - model_name=model, - generation_config=generation_config, - system_instruction=system_prompt, - ) - - chat_session = model_instance.start_chat(history=[]) - response = chat_session.send_message(user_prompt) - return response.text - - async def call_google(self, model: str, system_prompt: str, user_prompt: str) -> str: - return await asyncio.to_thread(self.call_google_sync, model, system_prompt, user_prompt) - - async def generate_batch( - self, provider: str, model: str, system_prompt: str, user_prompts: List[str] - ) -> List[str]: - tasks = [] - for user_prompt in user_prompts: - if provider.lower() == "openai": - tasks.append(self.call_openai(model, system_prompt, user_prompt)) - elif provider.lower() == "google": - tasks.append(self.call_google(model, system_prompt, user_prompt)) - else: - raise ValueError("Unsupported provider. Choose either 'openai' or 'google'.") - - responses = await asyncio.gather(*tasks) - return responses - - def generate( - self, provider: str, model: str, system_prompt: str, user_prompts: List[str], parallel_requests: int = 10 - ) -> List[str]: + def op(self, max_reqs: int = 10, prompts: Optional[List[str]] = None): """ - Generates responses for the given list of user prompts in parallel batches. + Decorator to parallelize a user-defined prompt function over a list of prompts. Args: - provider (str): The API provider to use, either "openai" or "google". - model (str): The model to use for generating responses. - system_prompt (str): The system message prompt to include in each request. - user_prompts (List[str]): List of user messages for generation. - parallel_requests (int): Number of parallel requests to make. + max_reqs (int): Maximum number of parallel tasks. + prompts (List[str]): Prompts to process. Returns: - List[str]: List of generated responses corresponding to each user prompt. + Decorated function executed in parallel using asyncio. """ - async def generate_all(): - all_responses = [] - for i in range(0, len(user_prompts), parallel_requests): - batch_prompts = user_prompts[i:i + parallel_requests] - batch_responses = await self.generate_batch(provider, model, system_prompt, batch_prompts) - all_responses.extend(batch_responses) - return all_responses - - return self.loop.run_until_complete(generate_all()) + def decorator(func): + async def run_batch(prompts_batch): + tasks = [asyncio.to_thread(func, prompt=prompt) for prompt in prompts_batch] + return await asyncio.gather(*tasks) + + def wrapper(*args, **kwargs): + if prompts is None: + raise ValueError("You must pass a list of prompts to the decorator.") + results = [] + + async def run_all(): + for i in range(0, len(prompts), max_reqs): + batch = prompts[i:i + max_reqs] + batch_results = await run_batch(batch) + results.extend(batch_results) + return results + + return self.loop.run_until_complete(run_all()) + + return wrapper + return decorator def main(): fire.Fire(FireRequests) diff --git a/requirements.txt b/requirements.txt index b3d331d..54b01d9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,4 @@ aiofiles requests nest_asyncio tqdm -fire -google-generativeai -openai \ No newline at end of file +fire \ No newline at end of file